blob: a0f71fe74ec97fe483595a940b5d535112bb6ee0 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/public/common/privacy_budget/identifiability_metrics.h"
#include <cstdint>
#include "base/containers/span.h"
#include "base/hash/legacy_hash.h"
namespace blink {
uint64_t IdentifiabilityDigestOfBytes(base::span<const uint8_t> in) {
// The chosen hash function satisfies the following requirements:
//
// * Fast. These hashes will need to be calculated during performance
// critical code.
// * Suitable for fingerprinting. I.e. broad domain, good diffusion, low
// collision rate.
// * Resistant to hash flooding.
// * Able to use the entire 64-bit space we have at our disposal.
// * Either support iterative operation or be usable as a primitive for
// constructing one.
// * Remains stable for the duration of the identifiability study O(months).
// This one is trivial. It just means that the hash is not in danger of
// imminent change.
// * Implemented, well tested, and usable by //content, //chrome, as well
// as //blink/common.
//
// It is not a requirement for the digest to be a cryptographic hash. I.e. not
// necessary to deter second-preimage construction.
//
// base::PersistentHash(): (Rejected)
// - Based on SuperFastHash() which doesn't meet the fingerprinting
// requirement due to a high collision rate.
// - Digest is 32-bits.
// - No stateful implementation in //base. Blink's StringHasher is
// interestingly a stateful implementation of SuperFastHash but is not
// available in //blink/public/common.
//
// base::legacy::CityHash64{WithSeed}(): (Selected)
// - Based on Google's CityHash 1.0.3. Some known weaknesses, but still
// good enough.
// - No ready-to-use chaining implementation.
// + Digest is 64-bits.
// + Seeded variant is a useful primitive for a chained hash function.
// Would be better if it took two seeds, but one is also usable.
//
// Other hash functions were considered, but were rejected due to one or more
// of the following reasons:
// - An implementation was not available.
// - The version available has significant known weaknesses.
//
// One in particular that would have been nice to have is FarmHash.
//
// CityHash is quite efficient for small buffers. Operation counts are
// roughly as follows. For small buffers, fetches dominate.:
//
// Length │ Fetches │ Muls │ Shifts │
// ───────┼──────────┼─────────┼─────────┤
// 1..16 │ 3 │ 3 │ 4 │
// ───────┼──────────┼─────────┼─────────┤
// 17..32 │ 4 │ 3 │ 8 │
// ───────┼──────────┼─────────┼─────────┤
// 33..64 │ 10 │ 4 │ 18 │
// ───────┴──────────┴─────────┴─────────┘
return base::legacy::CityHash64(in);
}
} // namespace blink