From b9e7fc7fa9c70df36881b2166e8bd2bcfa87fe2e Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Wed, 26 Jul 2017 16:41:24 -0700 Subject: Add implemention of SortedRangeSet This is useful in imgdiff to maintain the block ranges of splitted source image. Bug: 34220646 Test: mma && unit tests pass Change-Id: I6427f2ea50f0e3b0aa3dd01880ec0206679b7429 --- tests/unit/rangeset_test.cpp | 47 +++++++++++++++ updater/include/updater/rangeset.h | 116 ++++++++++++++++++++++++++++++++++++- 2 files changed, 162 insertions(+), 1 deletion(-) diff --git a/tests/unit/rangeset_test.cpp b/tests/unit/rangeset_test.cpp index 3c6d77ef5..3993cb9ea 100644 --- a/tests/unit/rangeset_test.cpp +++ b/tests/unit/rangeset_test.cpp @@ -110,3 +110,50 @@ TEST(RangeSetTest, iterators) { } ASSERT_EQ((std::vector{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges); } + +TEST(RangeSetTest, tostring) { + ASSERT_EQ("2,1,6", RangeSet::Parse("2,1,6").ToString()); + ASSERT_EQ("4,1,5,8,10", RangeSet::Parse("4,1,5,8,10").ToString()); + ASSERT_EQ("6,1,3,4,6,15,22", RangeSet::Parse("6,1,3,4,6,15,22").ToString()); +} + +TEST(SortedRangeSetTest, insertion) { + SortedRangeSet rs({ { 2, 3 }, { 4, 6 }, { 8, 14 } }); + rs.Insert({ 1, 2 }); + ASSERT_EQ(SortedRangeSet({ { 1, 3 }, { 4, 6 }, { 8, 14 } }), rs); + ASSERT_EQ(static_cast(10), rs.blocks()); + rs.Insert({ 3, 5 }); + ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 } }), rs); + ASSERT_EQ(static_cast(11), rs.blocks()); + + SortedRangeSet r1({ { 20, 22 }, { 15, 18 } }); + rs.Insert(r1); + ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 }, { 15, 18 }, { 20, 22 } }), rs); + ASSERT_EQ(static_cast(16), rs.blocks()); + + SortedRangeSet r2({ { 2, 7 }, { 15, 21 }, { 20, 25 } }); + rs.Insert(r2); + ASSERT_EQ(SortedRangeSet({ { 1, 7 }, { 8, 14 }, { 15, 25 } }), rs); + ASSERT_EQ(static_cast(22), rs.blocks()); +} + +TEST(SortedRangeSetTest, file_range) { + SortedRangeSet rs; + rs.Insert(4096, 4096); + ASSERT_EQ(SortedRangeSet({ { 1, 2 } }), rs); + // insert block 2-9 + rs.Insert(4096 * 3 - 1, 4096 * 7); + ASSERT_EQ(SortedRangeSet({ { 1, 10 } }), rs); + // insert block 15-19 + rs.Insert(4096 * 15 + 1, 4096 * 4); + ASSERT_EQ(SortedRangeSet({ { 1, 10 }, { 15, 20 } }), rs); + + // rs overlaps block 2-2 + ASSERT_TRUE(rs.Overlaps(4096 * 2 - 1, 10)); + ASSERT_FALSE(rs.Overlaps(4096 * 10, 4096 * 5)); + + ASSERT_EQ(static_cast(10), rs.GetOffsetInRangeSet(4106)); + ASSERT_EQ(static_cast(40970), rs.GetOffsetInRangeSet(4096 * 16 + 10)); + // block#10 not in range. + ASSERT_EXIT(rs.GetOffsetInRangeSet(40970), ::testing::KilledBySignal(SIGABRT), ""); +} \ No newline at end of file diff --git a/updater/include/updater/rangeset.h b/updater/include/updater/rangeset.h index fad038043..b67c98724 100644 --- a/updater/include/updater/rangeset.h +++ b/updater/include/updater/rangeset.h @@ -24,6 +24,7 @@ #include #include +#include #include using Range = std::pair; @@ -74,6 +75,18 @@ class RangeSet { return RangeSet(std::move(pairs)); } + std::string ToString() const { + if (ranges_.empty()) { + return ""; + } + std::string result = std::to_string(ranges_.size() * 2); + for (const auto& r : ranges_) { + result += android::base::StringPrintf(",%zu,%zu", r.first, r.second); + } + + return result; + } + // Get the block number for the i-th (starting from 0) block in the RangeSet. size_t GetBlockNumber(size_t idx) const { CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")"; @@ -157,8 +170,109 @@ class RangeSet { return ranges_ != other.ranges_; } - private: + protected: // Actual limit for each value and the total number are both INT_MAX. std::vector ranges_; size_t blocks_; }; + +static constexpr size_t kBlockSize = 4096; + +// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input +// files when we're handling large zip files. Specifically, we can treat the input file as a +// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into +// several smaller chunks based on the zip entries. + +// For example, [source: 0-99] can be split into +// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89] +// Here "10-29" simply means block 10th to block 29th with respect to the original input file. +// Also, note that the split sources should be mutual exclusive, but they don't need to cover +// every block in the original source. +class SortedRangeSet : public RangeSet { + public: + SortedRangeSet() {} + + // Ranges in the the set should be mutually exclusive; and they're sorted by the start block. + explicit SortedRangeSet(std::vector&& pairs) : RangeSet(std::move(pairs)) { + std::sort(ranges_.begin(), ranges_.end()); + } + + void Insert(const Range& to_insert) { + SortedRangeSet rs({ to_insert }); + Insert(rs); + } + + // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges. + void Insert(const SortedRangeSet& rs) { + if (rs.size() == 0) { + return; + } + // Merge and sort the two RangeSets. + std::vector temp = std::move(ranges_); + std::copy(rs.begin(), rs.end(), std::back_inserter(temp)); + std::sort(temp.begin(), temp.end()); + + Clear(); + // Trim overlaps and insert the result back to ranges_. + Range to_insert = temp.front(); + for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) { + if (it->first <= to_insert.second) { + to_insert.second = std::max(to_insert.second, it->second); + } else { + ranges_.push_back(to_insert); + blocks_ += (to_insert.second - to_insert.first); + to_insert = *it; + } + } + ranges_.push_back(to_insert); + blocks_ += (to_insert.second - to_insert.first); + } + + void Clear() { + blocks_ = 0; + ranges_.clear(); + } + + using RangeSet::Overlaps; + bool Overlaps(size_t start, size_t len) const { + RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } }); + return Overlaps(rs); + } + + // Compute the block range the file occupies, and insert that range. + void Insert(size_t start, size_t len) { + Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 }; + Insert(to_insert); + } + + // Given an offset of the file, checks if the corresponding block (by considering the file as + // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset + // within this SortedRangeSet. + // + // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte. + // The mapped offset within a SortedRangeSet("1-9 15-19") is 10. + // + // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th + // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10 + // + 10) in a range represented by this SortedRangeSet. + size_t GetOffsetInRangeSet(size_t old_offset) const { + size_t old_block_start = old_offset / kBlockSize; + size_t new_block_start = 0; + for (const auto& range : ranges_) { + // Find the index of old_block_start. + if (old_block_start >= range.second) { + new_block_start += (range.second - range.first); + } else if (old_block_start >= range.first) { + new_block_start += (old_block_start - range.first); + return (new_block_start * kBlockSize + old_offset % kBlockSize); + } else { + CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: " + << this->ToString(); + return 0; + } + } + CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: " + << this->ToString(); + return 0; + } +}; \ No newline at end of file -- cgit v1.2.3