MiniOB 1
MiniOB is one mini database, helping developers to learn how database works.
载入中...
搜索中...
未找到
codec.h
1/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved.
2miniob is licensed under Mulan PSL v2.
3You can use this software according to the terms and conditions of the Mulan PSL v2.
4You may obtain a copy of Mulan PSL v2 at:
5 http://license.coscl.org.cn/MulanPSL2
6THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
7EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
8MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
9See the Mulan PSL v2 for more details. */
10
11#pragma once
12
13#include <cmath>
14#include "common/lang/span.h"
15#include "common/lang/string.h"
16#include "common/lang/vector.h"
17#include "common/log/log.h"
18#include "common/sys/rc.h"
19#include "common/value.h"
20#include <cstring>
21
22using byte_t = unsigned char;
23using bytes = vector<byte_t>;
24using float64_t = double_t;
25
26// reference: https://github.com/code0xff/orderedcodepp
28{
29public:
30 static const byte_t term[];
31 static const byte_t lit00[];
32 static const byte_t litff[];
33 static const byte_t inf[];
34 static const byte_t msb[];
35
36 static const byte_t increasing = 0x00;
37 static const byte_t decreasing = 0xff;
38
39 struct infinity
40 {
41 bool operator==(const infinity &i) const { return true; }
42
43 bool operator==(infinity &&i) { return true; }
44 };
45
46 template <typename T>
47 struct decr
48 {
49 T val;
50
51 bool operator==(const decr<T> &o) const { return val == o.val; }
52
53 bool operator==(decr<T> o) { return val == o.val; }
54 };
55
57 {
58 string s;
59 bool inf;
60 };
61
62 struct trailing_string : string
63 {};
64
65 static void invert(span<byte_t> &s)
66 {
67 std::for_each(s.begin(), s.end(), [](byte_t &c) { c ^= 0xff; });
68 }
69
70 static RC append(bytes &s, uint64_t x)
71 {
72 vector<byte_t> buf(9);
73 auto i = 8;
74 for (; x > 0; x >>= 8) {
75 buf[i--] = static_cast<byte_t>(x);
76 }
77 buf[i] = static_cast<byte_t>(8 - i);
78 s.insert(s.end(), buf.begin() + i, buf.end());
79 return RC::SUCCESS;
80 }
81
82 static RC append(bytes &s, int64_t x)
83 {
84 if (x >= -64 && x < 64) {
85 s.insert(s.end(), static_cast<byte_t>(x ^ 0x80));
86 return RC::SUCCESS;
87 }
88 bool neg = x < 0;
89 if (neg) {
90 x = ~x;
91 }
92 auto n = 1;
93 bytes buf(10);
94 auto i = 9;
95 for (; x > 0; x >>= 8) {
96 buf[i--] = static_cast<byte_t>(x);
97 n++;
98 }
99 bool lfb = n > 7;
100 if (lfb) {
101 n -= 7;
102 }
103 if (buf[i + 1] < 1 << (8 - n)) {
104 n--;
105 i++;
106 }
107 buf[i] |= msb[n];
108 if (lfb) {
109 buf[--i] = 0xff;
110 }
111 if (neg) {
112 span<byte_t> sp(&buf[i], buf.size() - i);
113 invert(sp);
114 }
115 s.insert(s.end(), buf.begin() + i, buf.end());
116 return RC::SUCCESS;
117 }
118
119 static RC append(bytes &s, float64_t x)
120 {
121 RC rc = RC::SUCCESS;
122 if (std::isnan(x)) {
123 LOG_WARN("append: NaN");
124 return RC::INVALID_ARGUMENT;
125 }
126 uint64_t b;
127 memcpy(&b, &x, sizeof(x));
128 auto i = int64_t(b);
129 if (i < 0) {
130 i = std::numeric_limits<int64_t>::min() - i;
131 }
132 if (OB_FAIL(append(s, i))) {
133 LOG_WARN("append: append failed, i=%ld, x=%lf", i, x);
134 return rc;
135 }
136 return rc;
137 }
138
139 static RC append(bytes &s, const string &x)
140 {
141 auto l = x.begin();
142 for (auto c = x.begin(); c < x.end(); c++) {
143 switch (byte_t(*c)) {
144 case 0x00:
145 s.insert(s.end(), l, c);
146 s.insert(s.end(), &lit00[0], &lit00[0] + 2);
147 l = c + 1;
148 break;
149 case 0xff:
150 s.insert(s.end(), l, c);
151 s.insert(s.end(), &litff[0], &litff[0] + 2);
152 l = c + 1;
153 }
154 }
155 s.insert(s.end(), l, x.end());
156 s.insert(s.end(), &term[0], &term[0] + 2);
157 return RC::SUCCESS;
158 }
159
160 static RC append(bytes &s, const trailing_string &x)
161 {
162 s.insert(s.end(), x.begin(), x.end());
163 return RC::SUCCESS;
164 }
165
166 static RC append(bytes &s, const infinity &_)
167 {
168 s.insert(s.end(), &inf[0], &inf[0] + 2);
169 return RC::SUCCESS;
170 }
171
172 static RC append(bytes &s, const string_or_infinity &x)
173 {
174 RC rc = RC::SUCCESS;
175 if (x.inf) {
176 if (!x.s.empty()) {
177 LOG_WARN("orderedcode: string_or_infinity has non-zero string and non-zero infinity");
178 return RC::INVALID_ARGUMENT;
179 }
180 if (OB_FAIL(append(s, infinity{}))) {
181 LOG_WARN("orderedcode: append infinity failed");
182 return rc;
183 }
184 } else {
185 if (OB_FAIL(append(s, x.s))) {
186 LOG_WARN("orderedcode: append string failed");
187 return rc;
188 }
189 }
190 return rc;
191 }
192
193 static RC parse(span<byte_t> &s, byte_t dir, int64_t &dst)
194 {
195 if (s.empty()) {
196 LOG_WARN("orderedcode: corrupt input");
197 return RC::INVALID_ARGUMENT;
198 }
199 byte_t c = s[0] ^ dir;
200 if (c >= 0x40 && c < 0xc0) {
201 dst = int64_t(int8_t(c ^ 0x80));
202 s = s.subspan(1);
203 return RC::SUCCESS;
204 }
205 bool neg = (c & 0x80) == 0;
206 if (neg) {
207 c = ~c;
208 dir = ~dir;
209 }
210 size_t n = 0;
211 if (c == 0xff) {
212 if (s.size() == 1) {
213 LOG_WARN("orderedcode: corrupt input");
214 return RC::INVALID_ARGUMENT;
215 }
216 s = s.subspan(1);
217 c = s[0] ^ dir;
218 if (c > 0xc0) {
219 LOG_WARN("orderedcode: corrupt input");
220 return RC::INVALID_ARGUMENT;
221 }
222 n = 7;
223 }
224 for (byte_t mask = 0x80; (c & mask) != 0; mask >>= 1) {
225 c &= ~mask;
226 n++;
227 }
228 if (s.size() < n) {
229 LOG_WARN("orderedcode: corrupt input");
230 return RC::INVALID_ARGUMENT;
231 }
232 int64_t x = c;
233 for (size_t i = 1; i < n; i++) {
234 c = s[i] ^ dir;
235 x = x << 8 | c;
236 }
237 if (neg) {
238 x = ~x;
239 }
240 dst = x;
241 s = s.subspan(n);
242 return RC::SUCCESS;
243 }
244
245 static RC parse(span<byte_t> &s, byte_t dir, uint64_t &dst)
246 {
247 RC rc = RC::SUCCESS;
248 if (s.empty()) {
249 LOG_WARN("orderedcode: corrupt input");
250 return RC::INVALID_ARGUMENT;
251 }
252 byte_t n = s[0] ^ dir;
253 if (n > 8 || (int)s.size() < (1 + n)) {
254 LOG_WARN("orderedcode: corrupt input");
255 return RC::INVALID_ARGUMENT;
256 }
257 uint64_t x = 0;
258 for (size_t i = 0; i < n; i++) {
259 x = x << 8 | (s[1 + i] ^ dir);
260 }
261 dst = x;
262 s = s.subspan(1 + n);
263 return rc;
264 }
265
266 static RC parse(span<byte_t> &s, byte_t dir, infinity &_)
267 {
268 RC rc = RC::SUCCESS;
269 if (s.size() < 2) {
270 LOG_WARN("orderedcode: corrupt input");
271 return RC::INVALID_ARGUMENT;
272 }
273 if ((s[0] ^ dir) != inf[0] || (s[1] ^ dir) != inf[1]) {
274 LOG_WARN("orderedcode: corrupt input");
275 return RC::INVALID_ARGUMENT;
276 }
277 s = s.subspan(2);
278 return rc;
279 }
280
281 static RC parse(span<byte_t> &s, byte_t dir, string &dst)
282 {
283 bytes buf;
284 for (auto l = 0, i = 0; i < (int)s.size();) {
285 switch (s[i] ^ dir) {
286 case 0x00:
287 if (i + 1 >= (int)s.size()) {
288 LOG_WARN("orderedcode: corrupt input");
289 return RC::INVALID_ARGUMENT;
290 }
291 switch (s[i + 1] ^ dir) {
292 case 0x01:
293 dst.clear();
294 if (l == 0 && dir == increasing) {
295 dst.insert(dst.end(), s.begin(), s.begin() + i);
296 s = s.subspan(i + 2);
297 return RC::SUCCESS;
298 }
299 buf.insert(buf.end(), s.begin() + l, s.begin() + i);
300 if (dir == decreasing) {
301 span<byte_t> sp(buf);
302 invert(sp);
303 }
304 dst.insert(dst.end(), buf.begin(), buf.end());
305 s = s.subspan(i + 2);
306 return RC::SUCCESS;
307 case 0xff:
308 buf.insert(buf.end(), s.begin() + l, s.begin() + i);
309 buf.insert(buf.end(), static_cast<byte_t>(0x00 ^ dir));
310 i += 2;
311 l = i;
312 break;
313 default: LOG_WARN("orderedcode: corrupt input"); return RC::INVALID_ARGUMENT;
314 }
315 break;
316 case 0xff:
317 if (i + 1 >= (int)s.size() || ((s[i + 1] ^ dir) != 0x00)) {
318 LOG_WARN("orderedcode: corrupt input");
319 return RC::INVALID_ARGUMENT;
320 }
321 buf.insert(buf.end(), s.begin() + l, s.begin() + i);
322 buf.insert(buf.end(), static_cast<byte_t>(0xff ^ dir));
323 i += 2;
324 l = i;
325 break;
326 default: i++;
327 }
328 }
329 LOG_WARN("orderedcode: corrupt input");
330 return RC::INVALID_ARGUMENT;
331 }
332
333 static RC parse(span<byte_t> &s, byte_t dir, float64_t &dst)
334 {
335 RC rc = RC::SUCCESS;
336 int64_t i = 0;
337 parse(s, dir, i);
338 if (i < 0) {
339 i = ((int64_t)-1 << 63) - i;
340 }
341 memcpy(&dst, &i, sizeof(i));
342 if (std::isnan(dst)) {
343 rc = RC::INVALID_ARGUMENT;
344 }
345 return rc;
346 }
347
348 static RC parse(span<byte_t> &s, byte_t dir, string_or_infinity &dst)
349 {
350 RC rc = RC::SUCCESS;
351 try {
352 infinity _;
353 rc = parse(s, dir, _);
354 dst.inf = true;
355 return rc;
356 } catch (...) {
357 rc = parse(s, dir, dst.s);
358 return rc;
359 }
360 }
361
362 static RC parse(span<byte_t> &s, byte_t dir, trailing_string &dst)
363 {
364 dst.clear();
365 if (dir == increasing) {
366 dst.insert(dst.end(), s.begin(), s.end());
367 } else {
368 invert(s);
369 dst.insert(dst.end(), s.begin(), s.end());
370 }
371 return RC::SUCCESS;
372 }
373};
374
375class Codec
376{
377public:
378 static RC encode_without_rid(int64_t table_id, bytes &encoded_key)
379 {
380 RC rc = RC::SUCCESS;
381 if (OB_FAIL(OrderedCode::append(encoded_key, table_prefix))) {
382 LOG_WARN("append failed");
383 } else if (OB_FAIL(OrderedCode::append(encoded_key, table_id))) {
384 LOG_WARN("append failed");
385 }
386 return rc;
387 }
388 static RC encode(int64_t table_id, uint64_t rid, bytes &encoded_key)
389 {
390 RC rc = RC::SUCCESS;
391 if (OB_FAIL(OrderedCode::append(encoded_key, table_prefix))) {
392 LOG_WARN("append failed");
393 } else if (OB_FAIL(OrderedCode::append(encoded_key, table_id))) {
394 LOG_WARN("append failed");
395 } else if (OB_FAIL(OrderedCode::append(encoded_key, rowkey_prefix))) {
396 LOG_WARN("append failed");
397 } else if (OB_FAIL(OrderedCode::append(encoded_key, rid))) {
398 LOG_WARN("append failed");
399 }
400 return rc;
401 }
402
403 static RC encode_table_prefix(int64_t table_id, bytes &encoded_key)
404 {
405 RC rc = RC::SUCCESS;
406 if (OB_FAIL(OrderedCode::append(encoded_key, table_prefix))) {
407 LOG_WARN("append failed");
408 } else if (OB_FAIL(OrderedCode::append(encoded_key, table_id))) {
409 LOG_WARN("append failed");
410 } else if (OB_FAIL(OrderedCode::append(encoded_key, rowkey_prefix))) {
411 LOG_WARN("append failed");
412 }
413 return rc;
414 }
415
416 static RC encode_value(const Value &val, bytes &dst)
417 {
418 RC rc = RC::SUCCESS;
419 switch (val.attr_type()) {
420 case AttrType::INTS:
421 if (OB_FAIL(OrderedCode::append(dst, (int64_t)val.get_int()))) {
422 LOG_WARN("append failed");
423 }
424 break;
425 case AttrType::FLOATS:
426 if (OB_FAIL(OrderedCode::append(dst, (double)val.get_float()))) {
427 LOG_WARN("append failed");
428 }
429 break;
430 case AttrType::CHARS:
431 if (OB_FAIL(OrderedCode::append(dst, val.get_string()))) {
432 LOG_WARN("append failed");
433 }
434 break;
435 default: return RC::INVALID_ARGUMENT;
436 }
437 return rc;
438 }
439
440 static RC encode_int(int64_t val, bytes &dst)
441 {
442 RC rc = RC::SUCCESS;
443 if (OB_FAIL(OrderedCode::append(dst, val))) {
444 LOG_WARN("append failed");
445 }
446 return rc;
447 }
448
449 static RC decode(bytes &encoded_key, int64_t &table_id)
450 {
451 RC rc = RC::SUCCESS;
452 span<byte_t> sp(encoded_key);
453 string table_prefix;
454 if (OB_FAIL(OrderedCode::parse(sp, OrderedCode::increasing, table_prefix))) {
455 LOG_WARN("parse failed");
456 return rc;
457 } else if (OB_FAIL(OrderedCode::parse(sp, OrderedCode::increasing, table_id))) {
458 LOG_WARN("parse failed");
459 return rc;
460 }
461 return rc;
462 }
463
464 static constexpr const char *table_prefix = "t";
465 static constexpr const char *rowkey_prefix = "r";
466};
467
468// template<typename T>
469// void append(bytes& s, decr<T> d) {
470// size_t n = s.size();
471// append(s, d.val);
472// span<byte_t> sp(&s[n], s.size() - n);
473// invert(sp);
474// }
475
476// template<typename It, typename... Its>
477// void append(bytes& s, It it, Its... its) {
478// append(s, it);
479// append(s, its...);
480// }
481
482// template<typename T>
483// RC parse(span<byte_t>& s, decr<T>& dst) {
484// return parse(s, decreasing, dst.val);
485// }
486
487// template<typename It>
488// RC parse(span<byte_t>& s, It& it) {
489// return parse(s, increasing, it);
490// }
491
492// template<typename It, typename... Its>
493// RC parse(span<byte_t>& s, It& it, Its&... its) {
494// RC rc = RC::SUCCESS;
495// if (OB_FAIL(parse(s, it))) {
496// LOG_WARN("parse failed");
497// return rc;
498// }
499// if (OB_FAIL(parse(s, its...))) {
500// LOG_WARN("parse failed");
501// return rc;
502// }
503// return rc;
504// }
Definition: codec.h:376
Definition: codec.h:28
属性的值
Definition: value.h:30
int get_int() const
Definition: value.cpp:234
Definition: codec.h:48
Definition: codec.h:40
Definition: codec.h:57
Definition: codec.h:63