BitMagic-C++
bmsparsevec_serial.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_SERIAL__H__INCLUDED__
2#define BMSPARSEVEC_SERIAL__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmsparsevec_serial.h
22 \brief Serialization for sparse_vector<>
23*/
24
25
26#ifndef BM__H__INCLUDED__
27// BitMagic utility headers do not include main "bm.h" declaration
28// #include "bm.h" or "bm64.h" explicitly
29# error missing include (bm.h or bm64.h)
30#endif
31
32#include "bmsparsevec.h"
33#include "bmserial.h"
34#include "bmbuffer.h"
35#include "bmdef.h"
36
37namespace bm
38{
39
40/** \defgroup svserial Sparse vector serialization
41 Sparse vector serialization
42 \ingroup svector
43 */
44
45
46/*!
47 \brief layout class for serialization buffer structure
48
49 Class keeps a memory block sized for the target sparse vector BLOB.
50 This class also provides acess to bit-plane memory, so it becomes possible
51 to use parallel storage methods to save bit-plains into
52 different storage shards.
53
54 \ingroup svserial
55*/
56template<class SV>
58{
59 typedef typename SV::value_type value_type;
60 typedef typename SV::bvector_type bvector_type;
62
64
66
67 /*!
68 \brief resize capacity
69 \param capacity - new capacity
70 \return new buffer or 0 if failed
71 */
72 unsigned char* reserve(size_t capacity)
73 {
74 if (capacity == 0)
75 {
76 freemem();
77 return 0;
78 }
79 buf_.reinit(capacity);
80 return buf_.data();
81 }
82
83 /// return current serialized size
84 size_t size() const { return buf_.size(); }
85
86 /// Set new serialized size
87 void resize(size_t ssize) { buf_.resize(ssize); }
88
89 /// return serialization buffer capacity
90 size_t capacity() const { return buf_.capacity(); }
91
92 /// free memory
93 void freemem() { buf_.release(); }
94
95 /// Set plain output pointer and size
96 void set_plain(unsigned i, unsigned char* ptr, size_t buf_size)
97 {
98 plain_ptrs_[i] = ptr;
99 plane_size_[i] = buf_size;
100 }
101
102 /// Get plain pointer
103 const unsigned char* get_plain(unsigned i) const { return plain_ptrs_[i]; }
104
105 /// Return serialization buffer pointer
106 const unsigned char* buf() const { return buf_.buf(); }
107 /// Return serialization buffer pointer
108 const unsigned char* data() const { return buf_.buf(); }
109
110private:
112 void operator=(const sparse_vector_serial_layout&);
113protected:
114 buffer_type buf_; ///< serialization buffer
115 unsigned char* plain_ptrs_[SV::sv_plains]; ///< pointers on serialized bit-plains
116 size_t plane_size_[SV::sv_plains]; ///< serialized plain size
117};
118
119// -------------------------------------------------------------------------
120
121/*!
122 \brief Serialize sparse vector into a memory buffer(s) structure
123
124 Serialization format:
125
126 | HEADER | BIT-VECTORS ... | REMAP_MATRIX
127
128 Header structure:
129 -----------------
130 BYTE+BYTE: Magic-signature 'BM' or 'BC' (c-compressed)
131 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
132 {
133 BYTE : Number of Bit-vector plains (total) (non-zero when < 255 plains)
134 |
135 BYTE: zero - flag of large plain matrix
136 INT64: Nnmber of bit-vector plains
137 }
138 INT64: Vector size
139 INT64: Offset of plain 0 from the header start (value 0 means plain is empty)
140 INT64: Offset of plain 1 from
141 ...
142 INT32: reserved
143
144Bit-vectors:
145------------
146 Based on current bit-vector serialization
147
148Remap Matrix:
149 SubHeader | Matrix BLOB
150
151 sub-header:
152 BYTE: 'R' (remapping) or 'N' (no remapping)
153 N - means no other info is saved on the stream
154 INT64: remap matrix size
155
156 \ingroup svector
157 \ingroup svserial
158*/
159template<typename SV>
161{
162public:
163 typedef typename SV::bvector_type bvector_type;
166 typedef typename SV::value_type value_type;
167 typedef typename SV::size_type size_type;
170
171public:
173
174 /**
175 Add skip-markers for faster range deserialization
176
177 @param enable - TRUE searilization will add bookmark codes
178 @param bm_interval - bookmark interval in (number of blocks)
179 (suggested between 4 and 512)
180 smaller interval means more bookmarks added to the skip list thus
181 more increasing the BLOB size
182 */
183 void set_bookmarks(bool enable, unsigned bm_interval = 256)
184 { bvs_.set_bookmarks(enable, bm_interval); }
185
186 /// Turn ON and OFF XOR compression of sparse vectors
187 void set_xor_ref(bool is_enabled) { is_xor_ref_ = is_enabled; }
188
189 /// Get XOR reference compression status (enabled/disabled)
190 bool is_xor_ref() const { return is_xor_ref_; }
191
192 /*!
193 \brief Serialize sparse vector into a memory buffer(s) structure
194
195 \param sv - sparse vector to serialize
196 \param sv_layout - buffer structure to keep the result
197 as defined in bm::serialization_flags
198 */
199 void serialize(const SV& sv,
201
202protected:
203 typedef typename
205
206 void build_xor_ref_vector(const SV& sv);
207
208private:
210 sparse_vector_serializer& operator=(const sparse_vector_serializer&) = delete;
211protected:
213
216};
217
218/**
219 sparse vector de-serializer
220
221*/
222template<typename SV>
224{
225public:
226 typedef typename SV::bvector_type bvector_type;
229 typedef typename SV::value_type value_type;
230 typedef typename SV::size_type size_type;
232
233public:
236
237 /*!
238 Deserialize sparse vector
239
240 @param sv - [out] target sparse vector to populate
241 @param buf - source memory pointer
242 */
243 void deserialize(SV& sv, const unsigned char* buf)
244 { idx_range_set_ = false; deserialize_sv(sv, buf, 0); }
245
246 /*!
247 Deserialize sparse vector for the range [from, to]
248
249 @param sv - [out] target sparse vector to populate
250 @param buf - source memory pointer
251 @param from - start vector index for deserialization range
252 @param to - end vector index for deserialization range
253 */
254 void deserialize_range(SV& sv, const unsigned char* buf,
255 size_type from, size_type to);
256
257 void deserialize(SV& sv, const unsigned char* buf,
258 size_type from, size_type to)
259 {
260 deserialize_range(sv, buf, from, to);
261 }
262
263
264 /*!
265 Deserialize sparse vector using address mask vector
266 Address mask defines (by set bits) which vector elements to be extracted
267 from the compressed BLOB
268
269 @param sv - [out] target sparse vector to populate
270 @param buf - source memory pointer
271 @param mask_bv - AND mask bit-vector (address gather vector)
272 */
273 void deserialize(SV& sv,
274 const unsigned char* buf,
275 const bvector_type& mask_bv)
276 { idx_range_set_ = false; deserialize_sv(sv, buf, &mask_bv); }
277
278
279protected:
282
283
284 /// Deserialize header/version and other common info
285 ///
286 /// @return number of bit-plains
287 ///
288 unsigned load_header(bm::decoder& dec, SV& sv, unsigned char& matr_s_ser);
289
290 void deserialize_sv(SV& sv, const unsigned char* buf,
291 const bvector_type* mask_bv);
292
293
294 /// deserialize bit-vector plains
295 void deserialize_plains(SV& sv, unsigned plains,
296 const unsigned char* buf,
297 const bvector_type* mask_bv = 0);
298
299 /// load offset table
300 void load_plains_off_table(bm::decoder& dec, unsigned plains);
301
302 /// load NULL bit-plain (returns new plains count)
303 int load_null_plain(SV& sv,
304 int plains,
305 const unsigned char* buf,
306 const bvector_type* mask_bv);
307
308 /// load string remap dict
309 static void load_remap(SV& sv, const unsigned char* remap_buf_ptr);
310
311 /// throw error on incorrect deserialization
312 static void raise_invalid_header();
313 /// throw error on incorrect deserialization
314 static void raise_invalid_64bit();
315 /// throw error on incorrect deserialization
316 static void raise_invalid_bitdepth();
317
318private:
320 sparse_vector_deserializer& operator=(const sparse_vector_deserializer&) = delete;
321protected:
322 const unsigned char* remap_buf_ptr_;
331 bm::heap_vector<size_t, alloc_type, true> off_vect_;
332
334
338};
339
340
341
342/*!
343 \brief Serialize sparse vector into a memory buffer(s) structure
344
345 \param sv - sparse vector to serialize
346 \param sv_layout - buffer structure to keep the result
347 \param temp_block - temporary buffer
348 (allocate with BM_DECLARE_TEMP_BLOCK(x) for speed)
349
350 \ingroup svserial
351
352 @sa serialization_flags
353 @sa sparse_vector_deserializer
354*/
355template<class SV>
357 const SV& sv,
359 bm::word_t* temp_block = 0)
360{
361 (void)temp_block;
363 sv_serializer.serialize(sv, sv_layout);
364}
365
366// -------------------------------------------------------------------------
367
368/*!
369 \brief Deserialize sparse vector
370 \param sv - target sparse vector
371 \param buf - source memory buffer
372 \param temp_block - temporary block buffer to avoid re-allocations
373
374 \return 0 (error processing via std::logic_error)
375
376 \ingroup svserial
377 @sa sparse_vector_deserializer
378*/
379template<class SV>
381 const unsigned char* buf,
382 bm::word_t* temp_block=0)
383{
384 (void)temp_block;
386 sv_deserializer.deserialize(sv, buf);
387 return 0;
388}
389
390// -------------------------------------------------------------------------
391
392/**
393 Seriaizer for compressed collections
394*/
395template<class CBC>
397{
398public:
400 typedef typename CBC::bvector_type bvector_type;
401 typedef typename CBC::buffer_type buffer_type;
402 typedef typename CBC::statistics statistics_type;
403 typedef typename CBC::address_resolver_type address_resolver_type;
404
405public:
406 void serialize(const CBC& buffer_coll,
407 buffer_type& buf,
408 bm::word_t* temp_block = 0);
409};
410
411/**
412 Deseriaizer for compressed collections
413*/
414template<class CBC>
416{
417public:
419 typedef typename CBC::bvector_type bvector_type;
420 typedef typename CBC::buffer_type buffer_type;
421 typedef typename CBC::statistics statistics_type;
422 typedef typename CBC::address_resolver_type address_resolver_type;
423 typedef typename CBC::container_type container_type;
424
425public:
426 int deserialize(CBC& buffer_coll,
427 const unsigned char* buf,
428 bm::word_t* temp_block=0);
429};
430
431
432// -------------------------------------------------------------------------
433
434/**
435 \brief Serialize compressed collection into memory buffer
436
437Serialization format:
438
439
440<pre>
441 | MAGIC_HEADER | ADDRESS_BITVECTROR | LIST_OF_BUFFER_SIZES | BUFFER(s)
442
443 MAGIC_HEADER:
444 BYTE+BYTE: Magic-signature 'BM' or 'BC'
445 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
446
447 ADDRESS_BITVECTROR:
448 INT64: address bit-vector size
449 [memblock]: serialized address bit-vector
450
451 LIST_OF_BUFFER_SIZES:
452 INT64 - buffer sizes count
453 INT32 - buffer size 0
454 INT32 - buffer size 1
455 ...
456
457 BUFFERS:
458 [memblock]: block0
459 [memblock]: block1
460 ...
461
462</pre>
463*/
464
465template<class CBC>
467 buffer_type& buf,
468 bm::word_t* temp_block)
469{
471 buffer_coll.calc_stat(&st);
472
473 buf.resize(st.max_serialize_mem);
474
475 // ptr where bit-plains start
476 unsigned char* buf_ptr = buf.data();
477
478 bm::encoder enc(buf.data(), buf.capacity());
480 enc.put_8('B');
481 enc.put_8('C');
482 enc.put_8((unsigned char)bo);
483
484 unsigned char* mbuf1 = enc.get_pos(); // bookmark position
485 enc.put_64(0); // address vector size (reservation)
486
487 buf_ptr = enc.get_pos();
488
489 const address_resolver_type& addr_res = buffer_coll.resolver();
490 const bvector_type& bv = addr_res.get_bvector();
491 {
492 bm::serializer<bvector_type > bvs(temp_block);
493 bvs.gap_length_serialization(false);
494
495 size_t addr_bv_size = bvs.serialize(bv, buf_ptr, buf.size());
496 buf_ptr += addr_bv_size;
497
498 enc.set_pos(mbuf1); // rewind to bookmark
499 enc.put_64(addr_bv_size); // save the address vector size
500 }
501 enc.set_pos(buf_ptr); // restore stream position
502 size_t coll_size = buffer_coll.size();
503
504 enc.put_64(coll_size);
505
506 // pass 1 (save buffer sizes)
507 {
508 for (unsigned i = 0; i < buffer_coll.size(); ++i)
509 {
510 const buffer_type& cbuf = buffer_coll.get(i);
511 size_t sz = cbuf.size();
512 enc.put_64(sz);
513 } // for i
514 }
515 // pass 2 (save buffers)
516 {
517 for (unsigned i = 0; i < buffer_coll.size(); ++i)
518 {
519 const buffer_type& cbuf = buffer_coll.get(i);
520 size_t sz = cbuf.size();
521 enc.memcpy(cbuf.buf(), sz);
522 } // for i
523 }
524 buf.resize(enc.size());
525}
526
527// -------------------------------------------------------------------------
528template<class CBC>
530 CBC& buffer_coll,
531 const unsigned char* buf,
532 bm::word_t* temp_block)
533{
534 // TODO: implement correct processing of byte-order corect deserialization
535 // ByteOrder bo_current = globals<true>::byte_order();
536
537 bm::decoder dec(buf);
538 unsigned char h1 = dec.get_8();
539 unsigned char h2 = dec.get_8();
540
541 BM_ASSERT(h1 == 'B' && h2 == 'C');
542 if (h1 != 'B' && h2 != 'C') // no magic header? issue...
543 {
544 return -1;
545 }
546 //unsigned char bv_bo =
547 dec.get_8();
548
549 // -----------------------------------------
550 // restore address resolver
551 //
552 bm::id64_t addr_bv_size = dec.get_64();
553
554 const unsigned char* bv_buf_ptr = dec.get_pos();
555
556 address_resolver_type& addr_res = buffer_coll.resolver();
557 bvector_type& bv = addr_res.get_bvector();
558 bv.clear();
559
560 bm::deserialize(bv, bv_buf_ptr, temp_block);
561 addr_res.sync();
562
563 typename bvector_type::size_type addr_cnt = bv.count();
564 dec.seek((int)addr_bv_size);
565
566 // -----------------------------------------
567 // read buffer sizes
568 //
569 bm::id64_t coll_size = dec.get_64();
570 if (coll_size != addr_cnt)
571 {
572 return -2; // buffer size collection does not match address vector
573 }
574
575 typedef std::vector<unsigned>::size_type vect_size_type;
576 std::vector<bm::id64_t> buf_size_vec;
577 buf_size_vec.resize(vect_size_type(coll_size));
578 {
579 for (unsigned i = 0; i < coll_size; ++i)
580 {
581 bm::id64_t sz = dec.get_64();
582 buf_size_vec[i] = sz;
583 } // for i
584 }
585
586 {
587 container_type& buf_vect = buffer_coll.container();
588 buf_vect.resize(vect_size_type(coll_size));
589 for (unsigned i = 0; i < coll_size; ++i)
590 {
591 bm::id64_t sz = buf_size_vec[i];
592 buffer_type& b = buf_vect.at(i);
593 b.resize(sz);
594 dec.memcpy(b.data(), sz);
595 } // for i
596 }
597 buffer_coll.sync();
598 return 0;
599}
600
601// -------------------------------------------------------------------------
602//
603// -------------------------------------------------------------------------
604
605template<typename SV>
607{
608 bvs_.gap_length_serialization(false);
609 #ifdef BMXORCOMP
610 is_xor_ref_ = true;
611 #else
612 is_xor_ref_ = false;
613 #endif
614}
615
616// -------------------------------------------------------------------------
617
618template<typename SV>
620{
621 bv_ref_.reset();
622 bv_ref_.build(sv.get_bmatrix());
623}
624
625// -------------------------------------------------------------------------
626
627template<typename SV>
630{
631 typename SV::statistics sv_stat;
632 sv.calc_stat(&sv_stat);
633 unsigned char* buf = sv_layout.reserve(sv_stat.max_serialize_mem);
634
635 bm::encoder enc(buf, (unsigned)sv_layout.capacity());
636 unsigned plains = sv.stored_plains();
637
638 // header size in bytes
639 unsigned h_size = 1 + 1 + // "BM" or "BC" (magic header)
640 1 + // byte-order
641 1 + // number of bit-plains (for vector)
642 8 + // size (internal 64-bit)
643 (8 * plains) + // offsets of all plains
644 4; // reserve
645 // for large plain matrixes
646 {
647 h_size += 1 + // version number
648 8; // number of plains (64-bit)
649 }
650
651 // ---------------------------------
652 // Setup XOR reference compression
653 //
654 if (is_xor_ref())
655 {
656 build_xor_ref_vector(sv);
657 bvs_.set_ref_vectors(&bv_ref_);
658 }
659
660 // -----------------------------------------------------
661 // Serialize all bvector plains
662 //
663
664 unsigned char* buf_ptr = buf + h_size; // ptr where plains start (start+hdr)
665
666 unsigned i;
667 for (i = 0; i < plains; ++i)
668 {
669 typename SV::bvector_type_const_ptr bv = sv.get_plain(i);
670 if (!bv) // empty plain
671 {
672 sv_layout.set_plain(i, 0, 0);
673 continue;
674 }
675 if (is_xor_ref())
676 {
677 unsigned idx = (unsigned)bv_ref_.find(i);
678 BM_ASSERT(idx != bv_ref_.not_found());
679 bvs_.set_curr_ref_idx(idx);
680 }
681
682 size_t buf_size =
683 bvs_.serialize(*bv, buf_ptr, sv_stat.max_serialize_mem);
684
685 sv_layout.set_plain(i, buf_ptr, buf_size);
686 buf_ptr += buf_size;
687 if (sv_stat.max_serialize_mem > buf_size)
688 {
689 sv_stat.max_serialize_mem -= buf_size;
690 continue;
691 }
692 BM_ASSERT(0); // TODO: throw an exception here
693 } // for i
694
695 bvs_.set_ref_vectors(0); // disangage XOR ref vector
696
697 // -----------------------------------------------------
698 // serialize the re-map matrix
699 //
700 if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
701 {
702 bm::encoder enc_m(buf_ptr, sv_stat.max_serialize_mem);
703 if (sv.is_remap())
704 {
705 bm::id64_t remap_size = sv.remap_size();
706 const unsigned char* matrix_buf = sv.get_remap_buffer();
707 BM_ASSERT(matrix_buf);
708 BM_ASSERT(remap_size);
709
710 enc_m.put_8('R');
711 enc_m.put_64(remap_size);
712 enc_m.memcpy(matrix_buf, remap_size);
713 enc_m.put_8('E'); // end of matrix (integrity check token)
714 }
715 else
716 {
717 enc_m.put_8('N');
718 }
719 buf_ptr += enc_m.size(); // add mattrix encoded data size
720 }
721
722 sv_layout.resize(size_t(buf_ptr - buf)); // set the true occupied size
723
724 // -----------------------------------------------------
725 // save the header
726 //
728
729 enc.put_8('B'); // magic header 'BM' - bit matrix 'BC' - bit compressed
730 if (sv.is_compressed())
731 enc.put_8('C');
732 else
733 enc.put_8('M');
734
735 enc.put_8((unsigned char)bo); // byte order
736
737 unsigned char matr_s_ser = 1;
738#ifdef BM64ADDR
739 matr_s_ser = 2;
740#endif
741
742 enc.put_8(0); // number of plains == 0 (legacy magic number)
743 enc.put_8(matr_s_ser); // matrix serialization version
744 enc.put_64(plains); // number of rows in the bit-matrix
745 enc.put_64(sv.size_internal());
746
747 // save the offset table (part of the header)
748 //
749 for (i = 0; i < plains; ++i)
750 {
751 const unsigned char* p = sv_layout.get_plain(i);
752 if (!p)
753 {
754 enc.put_64(0);
755 continue;
756 }
757 size_t offset = size_t(p - buf);
758 enc.put_64(offset);
759 } // for
760}
761
762// -------------------------------------------------------------------------
763//
764// -------------------------------------------------------------------------
765
766template<typename SV>
768 : remap_buf_ptr_(0)
769{
770 temp_block_ = alloc_.alloc_bit_block();
771 not_null_mask_bv_.set_allocator_pool(&pool_);
772 rsc_mask_bv_.set_allocator_pool(&pool_);
773 idx_range_set_ = false;
774}
775
776// -------------------------------------------------------------------------
777
778template<typename SV>
780{
781 if (temp_block_)
782 alloc_.free_bit_block(temp_block_);
783}
784
785// -------------------------------------------------------------------------
786
787template<typename SV>
789 const unsigned char* buf,
790 size_type from, size_type to)
791{
792 idx_range_set_ = true; idx_range_from_ = from; idx_range_to_ = to;
793
794 remap_buf_ptr_ = 0;
795 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
796
797 unsigned char matr_s_ser = 0;
798 unsigned plains = load_header(dec, sv, matr_s_ser);
799
800 sv.clear();
801
802 bm::id64_t sv_size = dec.get_64();
803 if (sv_size == 0)
804 return; // empty vector
805
806 sv.resize_internal(size_type(sv_size));
807 bv_ref_.reset();
808
809 load_plains_off_table(dec, plains); // read the offset vector of bit-plains
810
811 // TODO: add range for not NULL plane
812 plains = (unsigned)load_null_plain(sv, int(plains), buf, 0);
813
814 // check if mask needs to be relaculated using the NULL (index) vector
816 {
817 // recalculate plains range
818 size_type sv_left, sv_right;
819 bool range_valid = sv.resolve_range(from, to, &sv_left, &sv_right);
820 if (!range_valid)
821 {
822 sv.clear();
823 idx_range_set_ = false;
824 return;
825 }
826 else
827 {
828 idx_range_set_ = true; idx_range_from_ = sv_left; idx_range_to_ = sv_right;
829 }
830 }
831
832 deserialize_plains(sv, plains, buf, 0);
833
834 op_deserial_.set_ref_vectors(0);
835 deserial_.set_ref_vectors(0);
836 bv_ref_.reset();
837
838
839 // load the remap matrix
840 //
842 {
843 if (matr_s_ser)
844 load_remap(sv, remap_buf_ptr_);
845 } // if remap traits
846
847 sv.sync(true); // force sync, recalculate RS index, remap tables, etc
848 remap_buf_ptr_ = 0;
849
850 idx_range_set_ = false;
851}
852
853// -------------------------------------------------------------------------
854
855template<typename SV>
857 const unsigned char* buf,
858 const bvector_type* mask_bv)
859{
860 remap_buf_ptr_ = 0;
861 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
862
863 unsigned char matr_s_ser = 0;
864 unsigned plains = load_header(dec, sv, matr_s_ser);
865
866 sv.clear();
867
868 bm::id64_t sv_size = dec.get_64();
869 if (sv_size == 0)
870 return; // empty vector
871
872 sv.resize_internal(size_type(sv_size));
873 bv_ref_.reset();
874
875 load_plains_off_table(dec, plains); // read the offset vector of bit-plains
876
877 plains = (unsigned)load_null_plain(sv, int(plains), buf, mask_bv);
878
879 // check if mask needs to be relaculated using the NULL (index) vector
881 {
882 if (mask_bv)
883 {
884 const bvector_type* bv_null = sv.get_null_bvector();
885 BM_ASSERT(bv_null);
886 rsc_mask_bv_.clear(true);
887 not_null_mask_bv_.bit_and(*bv_null, *mask_bv, bvector_type::opt_compress);
888 rsc_compressor_.compress(rsc_mask_bv_, *bv_null, not_null_mask_bv_);
889 mask_bv = &rsc_mask_bv_;
890
891 // if it needs range recalculation
892 if (idx_range_set_) // range setting is in effect
893 {
894 //bool rf =
895 rsc_mask_bv_.find_range(idx_range_from_, idx_range_to_);
896 }
897 }
898 }
899
900 deserialize_plains(sv, plains, buf, mask_bv);
901
902 op_deserial_.set_ref_vectors(0);
903 deserial_.set_ref_vectors(0);
904 bv_ref_.reset();
905
906
907 // load the remap matrix
908 //
910 {
911 if (matr_s_ser)
912 load_remap(sv, remap_buf_ptr_);
913 } // if remap traits
914
915 sv.sync(true); // force sync, recalculate RS index, remap tables, etc
916 remap_buf_ptr_ = 0;
917}
918
919// -------------------------------------------------------------------------
920
921template<typename SV>
923 bm::decoder& dec, SV& sv, unsigned char& matr_s_ser)
924{
925 unsigned char h1 = dec.get_8();
926 unsigned char h2 = dec.get_8();
927
928 BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C'));
929
930 bool sig2_ok = (h2 == 'M' || h2 == 'C');
931 if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C')) // no magic header?
932 raise_invalid_header();
933
934 unsigned char bv_bo = dec.get_8(); (void) bv_bo;
935 unsigned plains = dec.get_8();
936 if (plains == 0) // bit-matrix
937 {
938 matr_s_ser = dec.get_8(); // matrix serialization version
939 plains = (unsigned) dec.get_64(); // number of rows in the bit-matrix
940 }
941 #ifdef BM64ADDR
942 #else
943 if (matr_s_ser == 2) // 64-bit matrix
944 {
945 raise_invalid_64bit();
946 }
947 #endif
948
949 unsigned sv_plains = sv.stored_plains();
950 if (!plains || plains > sv_plains)
951 raise_invalid_bitdepth();
952 return plains;
953}
954
955// -------------------------------------------------------------------------
956
957template<typename SV>
959 SV& sv,
960 unsigned plains,
961 const unsigned char* buf,
962 const bvector_type* mask_bv)
963{
964 if (mask_bv && !idx_range_set_)
965 idx_range_set_ = mask_bv->find_range(idx_range_from_, idx_range_to_);
966
967 op_deserial_.set_ref_vectors(&bv_ref_);
968 deserial_.set_ref_vectors(&bv_ref_);
969
970 // read-deserialize the plains based on offsets
971 // backward order to bring the NULL vector first
972 //
973 for (int i = int(plains-1); i >= 0; --i)
974 {
975 size_t offset = off_vect_[unsigned(i)];
976 if (!offset) // empty vector
977 continue;
978 const unsigned char* bv_buf_ptr = buf + offset; // seek to position
979 bvector_type* bv = sv.get_plain(unsigned(i));
980 BM_ASSERT(bv);
981
982 bv_ref_.add(bv, unsigned(i));
983
984 if (mask_bv) // gather mask set, use AND operation deserializer
985 {
986 typename bvector_type::mem_pool_guard mp_g_z(pool_, *bv);
987
989 && !remap_buf_ptr_) // last plain vector (special case)
990 {
991 size_t read_bytes =
992 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
993 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
994 bv->bit_and(*mask_bv, bvector_type::opt_compress);
995 continue;
996 }
997 if (idx_range_set_)
998 deserial_.set_range(idx_range_from_, idx_range_to_);
999 deserial_.deserialize(*bv, bv_buf_ptr);
1000 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1001 }
1002 else
1003 {
1005 !remap_buf_ptr_)
1006 {
1007 size_t read_bytes =
1008 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1009 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1010 if (idx_range_set_)
1011 bv->keep_range(idx_range_from_, idx_range_to_);
1012 continue;
1013 }
1014 if (idx_range_set_)
1015 {
1016 deserial_.set_range(idx_range_from_, idx_range_to_);
1017 deserial_.deserialize(*bv, bv_buf_ptr);
1018 bv->keep_range(idx_range_from_, idx_range_to_);
1019 }
1020 else
1021 {
1022 //size_t read_bytes =
1023 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1024 }
1025 }
1026
1027 } // for i
1028 deserial_.unset_range();
1029
1030}
1031
1032// -------------------------------------------------------------------------
1033
1034template<typename SV>
1036 int plains,
1037 const unsigned char* buf,
1038 const bvector_type* mask_bv)
1039{
1040 BM_ASSERT(plains > 0);
1041 if (!sv.is_nullable())
1042 return plains;
1043 int i = plains - 1;
1044 size_t offset = off_vect_[unsigned(i)];
1045 if (offset)
1046 {
1047 // TODO: improve serialization format to avoid non-range decode of
1048 // the NULL vector just to get to the offset of remap table
1049
1050 const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1051 bvector_type* bv = sv.get_plain(unsigned(i));
1052 bv_ref_.add(bv, unsigned(i));
1054 {
1055 // load the whole not-NULL vector regardless of range
1056 // TODO: load [0, idx_range_to_]
1057 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1058 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1059 }
1060 else // non-compressed SV
1061 {
1062 // NULL plain in string vector with substitute re-map
1063 //
1065 {
1066 BM_ASSERT(!remap_buf_ptr_);
1067 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1068 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1069 if (idx_range_set_)
1070 bv->keep_range(idx_range_from_, idx_range_to_);
1071 }
1072 else
1073 if (idx_range_set_)
1074 {
1075 deserial_.set_range(idx_range_from_, idx_range_to_);
1076 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1077 bv->keep_range(idx_range_from_, idx_range_to_);
1078 deserial_.unset_range();
1079 }
1080 else
1081 {
1082 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1083 }
1084 if (mask_bv)
1085 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1086 }
1087 }
1088 return plains-1;
1089}
1090
1091// -------------------------------------------------------------------------
1092
1093template<typename SV>
1095 bm::decoder& dec, unsigned plains)
1096{
1097 off_vect_.resize(plains);
1098 for (unsigned i = 0; i < plains; ++i)
1099 {
1100 size_t offset = (size_t) dec.get_64();
1101 off_vect_[i] = offset;
1102 } // for i
1103}
1104
1105// -------------------------------------------------------------------------
1106
1107template<typename SV>
1109 const unsigned char* remap_buf_ptr)
1110{
1111 if (!remap_buf_ptr)
1112 return;
1113
1114 bm::decoder dec_m(remap_buf_ptr);
1115 unsigned char rh = dec_m.get_8();
1116 switch (rh)
1117 {
1118 case 'N':
1119 break;
1120 case 'R':
1121 {
1122 size_t remap_size = (size_t) dec_m.get_64();
1123 unsigned char* remap_buf = sv.init_remap_buffer();
1124 BM_ASSERT(remap_buf);
1125 size_t target_remap_size = sv.remap_size();
1126 if (!remap_size || !remap_buf || remap_size != target_remap_size)
1127 {
1128 #ifndef BM_NO_STL
1129 throw std::logic_error("Invalid serialization format (remap size)");
1130 #else
1131 BM_THROW(BM_ERR_SERIALFORMAT);
1132 #endif
1133 }
1134 dec_m.memcpy(remap_buf, remap_size);
1135 unsigned char end_tok = dec_m.get_8();
1136 if (end_tok != 'E')
1137 {
1138 #ifndef BM_NO_STL
1139 throw std::logic_error("Invalid serialization format");
1140 #else
1141 BM_THROW(BM_ERR_SERIALFORMAT);
1142 #endif
1143 }
1144 sv.set_remap();
1145 }
1146 break;
1147 default:
1148 #ifndef BM_NO_STL
1149 throw std::logic_error("Invalid serialization format (remap error)");
1150 #else
1151 BM_THROW(BM_ERR_SERIALFORMAT);
1152 #endif
1153 } // switch
1154}
1155
1156// -------------------------------------------------------------------------
1157
1158template<typename SV>
1160{
1161#ifndef BM_NO_STL
1162 throw std::logic_error("Invalid serialization signature header");
1163#else
1164 BM_THROW(BM_ERR_SERIALFORMAT);
1165#endif
1166}
1167
1168// -------------------------------------------------------------------------
1169
1170template<typename SV>
1172{
1173#ifndef BM_NO_STL
1174 throw std::logic_error("Invalid serialization target (64-bit BLOB)");
1175#else
1176 BM_THROW(BM_ERR_SERIALFORMAT);
1177#endif
1178}
1179
1180// -------------------------------------------------------------------------
1181
1182template<typename SV>
1184{
1185#ifndef BM_NO_STL
1186 throw std::logic_error("Invalid serialization target (bit depth)");
1187#else
1188 BM_THROW(BM_ERR_SERIALFORMAT);
1189#endif
1190}
1191
1192// -------------------------------------------------------------------------
1193
1194} // namespace bm
1195
1196#include "bmundef.h"
1197
1198#endif
Definitions(internal)
#define BM_ASSERT
Definition bmdef.h:130
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
pre-processor un-defines to avoid global space pollution (internal)
List of reference bit-vectors with their true index associations.
Definition bmxor.h:241
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:134
allocator_type::allocator_pool_type allocator_pool_type
Definition bm.h:111
bm::id_t size_type
Definition bm.h:117
Alloc allocator_type
Definition bm.h:110
Deseriaizer for compressed collections.
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition encoding.h:101
void seek(int delta) BMNOEXCEPT
change current position
Definition encoding.h:95
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition encoding.h:89
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition encoding.h:618
Class for decoding data from memory buffer.
Definition encoding.h:118
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:703
Deserializer for bit-vector.
Definition bmserial.h:491
Memory encoding.
Definition encoding.h:50
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition encoding.h:485
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition encoding.h:493
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition encoding.h:562
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition encoding.h:420
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition encoding.h:501
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition encoding.h:472
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition bmserial.h:825
Algorithms for rank compression of bit-vector.
Definition bmalgo.h:408
Bit-vector serialization class.
Definition bmserial.h:76
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
Definition bmserial.h:1126
byte_buffer< allocator_type > buffer
Definition bmserial.h:85
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
Definition bmserial.h:1138
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition bmserial.h:2264
sparse vector de-serializer
bm::operation_deserializer< bvector_type > op_deserial_
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
void deserialize(SV &sv, const unsigned char *buf, const bvector_type &mask_bv)
bm::rank_compressor< bvector_type > rsc_compressor_
static void raise_invalid_bitdepth()
throw error on incorrect deserialization
static void raise_invalid_header()
throw error on incorrect deserialization
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type alloc_type
unsigned load_header(bm::decoder &dec, SV &sv, unsigned char &matr_s_ser)
Deserialize header/version and other common info.
void deserialize(SV &sv, const unsigned char *buf, size_type from, size_type to)
bm::heap_vector< size_t, alloc_type, true > off_vect_
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to)
void load_plains_off_table(bm::decoder &dec, unsigned plains)
load offset table
static void raise_invalid_64bit()
throw error on incorrect deserialization
const bvector_type * bvector_type_const_ptr
bm::deserializer< bvector_type, bm::decoder > deserial_
const unsigned char * remap_buf_ptr_
int load_null_plain(SV &sv, int plains, const unsigned char *buf, const bvector_type *mask_bv)
load NULL bit-plain (returns new plains count)
void deserialize_plains(SV &sv, unsigned plains, const unsigned char *buf, const bvector_type *mask_bv=0)
deserialize bit-vector plains
void deserialize_sv(SV &sv, const unsigned char *buf, const bvector_type *mask_bv)
static void load_remap(SV &sv, const unsigned char *remap_buf_ptr)
load string remap dict
void deserialize(SV &sv, const unsigned char *buf)
void set_bookmarks(bool enable, unsigned bm_interval=256)
Add skip-markers for faster range deserialization.
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
void build_xor_ref_vector(const SV &sv)
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
const bvector_type * bvector_type_const_ptr
void set_xor_ref(bool is_enabled)
Turn ON and OFF XOR compression of sparse vectors.
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
bm::serializer< bvector_type > bvs_
bool is_xor_ref() const
Get XOR reference compression status (enabled/disabled)
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition bmserial.h:2688
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
Definition bm.h:77
unsigned int word_t
Definition bmconst.h:38
ByteOrder
Byte orders recognized by the library.
Definition bmconst.h:429
unsigned long long int id64_t
Definition bmconst.h:34
ad-hoc conditional expressions
Definition bmutil.h:111
static ByteOrder byte_order()
Definition bmconst.h:464
layout class for serialization buffer structure
size_t size() const
return current serialized size
serializer< bvector_type >::buffer buffer_type
void resize(size_t ssize)
Set new serialized size.
unsigned char * reserve(size_t capacity)
resize capacity
const unsigned char * get_plain(unsigned i) const
Get plain pointer.
void set_plain(unsigned i, unsigned char *ptr, size_t buf_size)
Set plain output pointer and size.
size_t plane_size_[SV::sv_plains]
serialized plain size
unsigned char * plain_ptrs_[SV::sv_plains]
pointers on serialized bit-plains
buffer_type buf_
serialization buffer
const unsigned char * buf() const
Return serialization buffer pointer.
size_t capacity() const
return serialization buffer capacity
const unsigned char * data() const
Return serialization buffer pointer.