diff --git a/Makefile b/Makefile index c47428e..f2597c3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,8 @@ -.PHONY: all venv clean +.PHONY: all venv clean images .SUFFIXES: .bs .html +IMAGES := $(wildcard images/*.svg) + all: build/index.html clean: @@ -21,5 +23,12 @@ $(bikeshed): $(venv-marker) Makefile build: mkdir -p $@ -build/index.html: api.bs build $(bikeshed) +build/index.html: api.bs $(IMAGES) build $(bikeshed) $(bikeshed) --die-on=warning spec $< $@ + +images: + @echo "Regenerating images" + for i in $(IMAGES); do \ + tmp="$$(mktemp)"; \ + npx aasvg --extract --embed <"$$i" >"$$tmp" && mv "$$tmp" "$$i"; \ + done diff --git a/api.bs b/api.bs index d2af779..d2145ce 100644 --- a/api.bs +++ b/api.bs @@ -281,7 +281,6 @@ Open questions: option of either? => via conversion site * Epochs -TODO ## ListAggregationSystems API ## {#list-aggregation-systems-api} @@ -445,10 +444,185 @@ TODO TODO +## Anti-Replay Requirements ## {#anti-replay} + + +Conversion reports generated by browsers are bound +to the amount of [=privacy budget=] +that was expended by the site that requested the report. + +TODO + # Differential Privacy # {#dp} -This design uses the concept of differential privacy as the basis of its privacy design. +This design uses the concept of [=differential privacy=] +as the basis of its privacy design. [[PPA-DP]] + +Differential privacy is a mathematical definition of privacy +that can guarantee the amount of private information +that is revealed by a system. [[DP]] +Differential privacy is not the only means +by which privacy is protected in this system, +but it is the most rigorously defined and analyzed. +As such, it provides the strongest privacy guarantees. + +Differential privacy uses randomized noise +to hide private data contributions +to an aggregated dataset. +The effect of noise is to hide +individual contributions to the dataset, +but to retain the usefulness of any aggregated analysis. + +To apply differential privacy, +it is necessary to define what information is protected. +In this system, the protected information is +the [=impressions=] of a single user profile, +on a single user agent, +over a single week, +for a single website that registers [=conversions=]. +[[#dp-unit]] describes the implications of this design +in more detail. + +This attribution design uses a form of differential privacy +called individual differential privacy. +In this model, user agents are each separately responsible +for ensuring that they limit the information +that is contributed. + +The [=individual differential privacy=] design of this API +has three primary components: + +1. User agents limit the number of times + that they use [=impressions=] in [=conversion reports=]. + [[#dp-budget]] explores this in greater depth. + +2. [=Aggregation services=] ensure that any given [=conversion report=] is + only used in accordance with the [=privacy budget=]. + [[#anti-replay]] describes requirements on aggregation services + in more detail. + +3. Noise is added by [=aggregation services=]. + [[#dp-mechanism]] details the mechanisms that might be used. + +Together, these measures place limits +on the information that is released for each [=privacy unit=]. + + +## Privacy Unit ## {#dp-unit} + +An implementation of differential privacy +requires a clear definition for what is protected. +This is known as the privacy unit, +which represents the entity that receives privacy protection. + +This system adopts a [=privacy unit=] +that is the combination of three values: + +1. A user agent profile. + That is, an instance of a user agent, + as used by a single person. + +2. The [=site=] that requests information about impressions. + +
The sites that register impressions + are not considered. + Those sites do not receive information from this system directly. + +3. The current week. + +A change to any of these values produces a new privacy unit, +which results in a separate [=privacy budget=]. +Each site that a person visits receives a bounded amount of information +for each week. + +Ideally, the [=privacy unit=] is a single person. +Though ideal, it is not possible to develop a useful system +that guarantees perfect correspondance with a person, +for a number of reasons: + +* People use multiple browsers and multiple devices, + often without coordination. + +* A unit that covered all websites + could be exhausted by one site, + denying other sites any information. + +* Advertising is an ongoing activity. + Without renewing the [=privacy budget=] periodically, + sites could exhaust their budget forever. + + +### Browser Instances ### {#dp-instance} + +Each browser instance manages a separate [=privacy budget=]. + +Coordination between browser instances might be possible, +but not expected. +That coordination might allow privacy to be improved +by reducing the total amount of information that is released. +It might also improve the utility of attribution +by allowing impressions on one browser instance +to be converted on another. + +Coordination across different implementations +is presently out of scope for this work. +Implementations can perform some coordination +between instances that are known to be for the same person, +but this is not mandatory. + + +### Per-Site Limits ### {#dp-site} + +The information released to websites is done on the basis of [=site=]. +This aligns with the same boundary used in other privacy-relevant functions. + +A finer privacy unit, such as an [=origin=], +would make it trivial to obtain additional information. +Information about the same person could be gathered +from multiple origins. +That information could then be combined +by exploiting the free flow of information within the site, +using cookies [[COOKIES]] or similar. + +[[#dp-safety]] discusses attacks that exploit this limit +and some additional [=safety limits=] that might be implemented +by user agents +to protect against those attacks. + + +### Refresh Interval ### {#dp-refresh} + +The differential privacy budget available to a site +is refreshed at an interval of one week. + +This budget applies to the [=impressions=] +that are registered with the user agent +and later queried, +not conversions. + +From the perspective of the analysis [[PPA-DP]] +each week of impressions forms a separate database. +A finite number of queries can be made of each database, +as determined by the [=privacy budget=] +associated with that database. + +The goal is to set a value that is as large as feasible. +A longer period of time allows for a better privacy/utility balance +because sites can be allocated a larger overall budget +at any point in time, +while keeping the overall rate of privacy loss low. +However, a longer interval means that it is easier to +exhaust a privacy budget completely, +yield no information until the next refresh. + +The choice of a week is largely arbitrary. +One week is expected to be enough to allow sites +the ability to make decisions about how to spend [=privacy budgets=] +without careful planning that needs to account for +changes that might occur days or weeks in the future. + +[[#dp-budget]] describes the process for budgeting in more detail. ## Privacy Budgets ## {#dp-budget} @@ -456,9 +630,92 @@ This design uses the concept of differential privacy as the basis of its privacy Browsers maintain a privacy budget, which is a means of limiting the amount of privacy loss. +This specification uses an individual form +of (ε, δ)-differential privacy as its basis. +In this model, privacy loss is measured using the value ε. +The δ value is handled by the [=aggregation service=] +when adding noise to aggregates. + +Each user agent instance is responsible for +managing privacy budgets. + +Each [=conversion report=] that is requested specifies an ε value +that represents the amount of privacy budget +that the report consumes. + +When searching for impressions for the conversion report, +the user agent deducts the specified ε value from +the budget for the week in which those impressions fall. +If the privacy budget for that week is not sufficient, +the impressions from that week are not used. + +
+spec:html; type:dfn; text:site +
{ + "dp": { + "authors": [ + "Cynthia Dwork", + "Aaron Roth" + ], + "date": "2014", + "href": "https://doi.org/10.1561/0400000042", + "title": "The Algorithmic Foundations of Differential Privacy", + "publisher": "now, Foundations and Trends in Theoretical Computer Science, Vol. 9, Nos. 3–4" + }, "ppa-dp": { "authors": [ "Pierre Tholoniat", diff --git a/images/budget.svg b/images/budget.svg new file mode 100644 index 0000000..7f71a10 --- /dev/null +++ b/images/budget.svg @@ -0,0 +1,73 @@ +