| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_BUILDER_H_ |
| #define THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_BUILDER_H_ |
| |
| #include <array> |
| |
| #include "base/containers/span.h" |
| #include "base/strings/string_piece.h" |
| #include "base/sys_byteorder.h" |
| #include "third_party/blink/public/common/common_export.h" |
| #include "third_party/blink/public/common/privacy_budget/identifiability_internal_templates.h" |
| #include "third_party/blink/public/common/privacy_budget/identifiable_token.h" |
| |
| namespace blink { |
| |
| // Builds an IdentifiableToken incrementally. |
| // |
| // Use this when the input to a sample is a bunch of disjoint objects, or the |
| // sample needs to include objects that are incrementally encountered. |
| // |
| // Notes: |
| // * The digest returned by this class is *NOT* the same as the one |
| // IdentifiabilityDigestOfBytes for the same set of bytes. This is due to |
| // block based chaining of digests used by this class. |
| // IdentifiabilityDigestOfBytes and this class are *NOT* interchangeable. |
| // |
| // TODO(asanka): IdentifiabilityDigestOfBytes() and this class should |
| // interop better. Perhaps by making the latter use the former. |
| // |
| // * The digest returned by this class is *NOT* the same as what you would |
| // acquire by invoking IdentifiableToken() over the same object. |
| // IdentifiableToken() and this class are *NOT* interchangeable. |
| // |
| // * The digest returned by this class only depends on the cumulative sequence |
| // of bytes that are fed to it. The partitioning thereof is irrelevant. |
| // |
| // * This object never finalizes. Partial digests can be extracted at any |
| // point. |
| class BLINK_COMMON_EXPORT IdentifiableTokenBuilder { |
| public: |
| // Convenient alias for a span of const uint8_t. |
| using ByteSpan = IdentifiableToken::ByteSpan; |
| |
| // Initializes an "empty" incremental digest for the purpose of constructing |
| // an identifiability sample. |
| IdentifiableTokenBuilder(); |
| |
| // Initializes an incremental digest and populates it with the data contained |
| // in |message|. |
| explicit IdentifiableTokenBuilder(ByteSpan message); |
| |
| // Copies the intermediate state. |
| IdentifiableTokenBuilder(const IdentifiableTokenBuilder&); |
| |
| // Feeds data contained in |buffer| to the digest. |
| IdentifiableTokenBuilder& AddBytes(ByteSpan buffer); |
| |
| // Feeds data contained in |buffer| to the digest, but precedes the buffer |
| // contents with an integer indicating the length. Use this when: |
| // |
| // * |buffer| is atomic. I.e. it will always be added as a single buffer. |
| // |
| // * The boundary between |buffer| and adjacent objects cannot be uniquely |
| // established based on content. |
| // |
| // E.g.: Ignoring NUL terminators, the pair of strings "abcd", "efgh" will be |
| // assigned token as the strings "abcdefg", "h" if both are added |
| // individually via AddBytes(). But they will have distinct digests if |
| // added via AddAtomic(). |
| // |
| // If the contents of the object cannot be specified in a contiguous span of |
| // memory, then consider adding a length directly via AddValue() prior to |
| // adding the contents of the buffer. Doing so will achieve the same ends as |
| // AddAtomic(). |
| IdentifiableTokenBuilder& AddAtomic(ByteSpan buffer); |
| IdentifiableTokenBuilder& AddAtomic(base::StringPiece string) { |
| return AddAtomic(base::as_bytes(base::make_span(string))); |
| } |
| |
| // Feeds the underlying value of the |token| itself to the digest. Use this |
| // when |token| is computed in parallel in order to preserve the ordering of |
| // values that were seen in a concurrent sequence that cannot be |
| // deterministically interleaved into the primary stream. |
| IdentifiableTokenBuilder& AddToken(IdentifiableToken token); |
| |
| // Helper for feeding primitive types by value efficiently. Anything more |
| // complicated than that should be passed in as a base::span<const uint8_t>. |
| // |
| // Adds eight bytes to the digest. If the type of the value doesn't consume |
| // all of the bytes, pads the remainder with NUL bytes. |
| template <typename T, |
| typename std::enable_if_t< |
| std::is_same<T, internal::remove_cvref_t<T>>::value && |
| internal::has_unique_object_representations<T>::value && |
| sizeof(T) <= sizeof(uint64_t)>* = nullptr> |
| IdentifiableTokenBuilder& AddValue(T in) { |
| AlignPartialBuffer(); |
| int64_t clean_buffer = |
| base::ByteSwapToLE64(internal::DigestOfObjectRepresentation(in)); |
| return AddBytes(base::make_span( |
| reinterpret_cast<const uint8_t*>(&clean_buffer), sizeof(clean_buffer))); |
| } |
| |
| // Conversion operator captures an intermediate digest. |
| // |
| // The sample captures all the data that's been fed into the digest so far, |
| // but doesn't finalize the digest. It is valid to continue adding data after |
| // constructing an intermediate sample. |
| // |
| // (google-explicit-constructor also flags user-defined conversion operators.) |
| // NOLINTNEXTLINE(google-explicit-constructor) |
| operator IdentifiableToken() const; |
| |
| // Captures an intermediate digest. |
| // |
| // The sample captures all the data that's been fed into the digest so far, |
| // but doesn't finalize the digest. It is valid to continue adding data after |
| // constructing an intermediate sample. |
| IdentifiableToken GetToken() const; |
| |
| // No comparisons. |
| bool operator==(const IdentifiableTokenBuilder&) const = delete; |
| bool operator<(const IdentifiableTokenBuilder&) const = delete; |
| |
| private: |
| // Block size. Must be a multiple of 64. Higher block sizes consume more |
| // memory. The extra cost is unlikely to be worth it. |
| // |
| // Under the covers we use CityHash64. It can pretty efficiently digest |
| // 64-byte blocks. |
| static constexpr size_t kBlockSizeInBytes = 64; |
| |
| // Target alignment for new buffers. This is set to 8 for all platforms and |
| // must always stay constant across platforms. |
| static constexpr size_t kBlockAlignment = 8; |
| |
| // An array of exactly |kBlockSizeInBytes| bytes. |
| using BlockBuffer = std::array<uint8_t, kBlockSizeInBytes>; |
| |
| // A view of a full block. |
| using ConstFullBlockSpan = base::span<const uint8_t, kBlockSizeInBytes>; |
| |
| // Returns true if the partial buffer is aligned on |kBlockAlignment| |
| // boundary. |
| bool IsAligned() const; |
| |
| // Appends enough NUL bytes to |partial_| until the next insertion point is |
| // aligned on a |kBlockAlignment| boundary. |
| // |
| // If the partial buffer is non-empty, its size is unlikely to be aligned at |
| // machine word boundary. This makes subsequent append operations slow for |
| // data types that are already aligned. |
| // |
| // This should only be called prior to adding an atomic buffer. |
| void AlignPartialBuffer(); |
| |
| // Captures the |kBlockSizeInBytes| bytes of data in |block| into the digest. |
| // |block| must be exactly this many bytes. |
| void DigestBlock(ConstFullBlockSpan block); |
| |
| // Captures as many bytes as possible from |message| into the partial block in |
| // |partial_|. It captures a maximum of |kBlockSizeInBytes - 1| bytes. |
| // |
| // Returns a span covering the remainder of |message| that was not consumed. |
| ByteSpan SkimIntoPartial(ByteSpan message); |
| |
| // Returns a span for the contents of the partial block. |
| // |
| // Can be called at any point. Does not change the state of the partial |
| // buffer. |
| ByteSpan GetPartialBlock() const; |
| |
| // Returns a span that includes the contents of the partial block and backed |
| // by |partial_|. |
| // |
| // NOTE: Should only be called once |kBlockSizeInBytes| bytes have been |
| // accumulated. Resets |partial_size_| upon completion. |
| // |
| // NOTE: Any subsequent AddBytes(), AddValue(), AddAtomic() calls will |
| // invalidate the returned FullBlock. |
| ConstFullBlockSpan TakeCompletedBlock(); |
| |
| // Size of partially filled buffer. |
| size_t PartialSize() const; |
| |
| // Accumulates smaller pieces of data until we have a full block. |
| alignas(int64_t) BlockBuffer partial_; |
| |
| // Next available position in `partial_`. std::array iterators are never |
| // invalidated. |
| BlockBuffer::iterator position_ = partial_.begin(); |
| |
| // Merkle-Damgård chaining. |
| uint64_t chaining_value_; |
| }; |
| |
| } // namespace blink |
| |
| #endif // THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_BUILDER_H_ |