FRI.tex

%\documentclass[10pt,a4paper,oneside]{article}
% !TEX TS-program = pdflatex
% !TEX encoding = UTF-8 Unicode

%\documentclass[11pt,article,oneside]{memoir} 
%\usepackage[utf8]{inputenc}
%\usepackage[T1]{fontenc}
%\usepackage{lmodern}

\documentclass[11pt]{article}

%\usepackage[utf8]{inputenc}
%\usepackage[T1]{fontenc}
\usepackage{lmodern}

\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage[total={7in,9in}]{geometry}
\usepackage{graphicx}
\usepackage{lmodern}
\usepackage[bookmarks, colorlinks=false, pdftitle={A summary on the FRI low degree test}, pdfauthor={author}]{hyperref}
\usepackage[colorinlistoftodos]{todonotes}
\usepackage{tikz}
\usepackage{titlesec}
\usepackage{float}
\usetikzlibrary{shapes, fit}
\setcounter{tocdepth}{4}
\setcounter{secnumdepth}{4}
\setlength{\marginparwidth}{3cm}


\usepackage{url}
\usepackage{amsthm}
\usepackage{mathrsfs}
\usepackage{nicefrac}

\usepackage[n,advantage,operators,sets,adversary,landau,probability,notions,logic,ff,mm,primitives,events, complexity,asymptotics,keys]{cryptocode}

\usepackage{listings}
\usepackage{footnote}

\definecolor{dkgreen}{rgb}{0,0.6,0}
\definecolor{gray}{rgb}{0.5,0.5,0.5}
\definecolor{mauve}{rgb}{0.58,0,0.82}

\lstset{%frame=tb,https://www.overleaf.com/project/608bc77c801b16bbadb2210a
  language=sh,
  aboveskip=3mm,
  belowskip=3mm,
  showstringspaces=false,
  columns=flexible,
  basicstyle={\small\ttfamily},
  numbers=none,
  numberstyle=\tiny\color{gray},
  keywordstyle=\color{blue},
  commentstyle=\color{dkgreen},
  stringstyle=\color{mauve},
  breaklines=true,
  breakatwhitespace=true,
  tabsize=3
}

\RequirePackage{etex}

% Theorem environments %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newtheorem{thm}{Theorem}[]
\newtheorem*{thm*}{Theorem}
\newtheorem{cor}{Corollary}[]
\newtheorem{lem}[]{Lemma}
\newtheorem{prop}[]{Proposition}
\newtheorem{conj}[]{Conjecture}
\newtheorem{protocol}[]{Protocol}

\theoremstyle{definition}
\newtheorem{defn}[thm]{Definition}
\newtheorem*{defn*}{Definition}

\theoremstyle{remark}
\newtheorem{rem}[thm]{Remark}
\newtheorem{rems}[thm]{Remarks}
\newtheorem{rem*}[]{Remark}

% MATH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\Z}{\mathbb{Z}}
\DeclareMathOperator{\N}{\mathbb{N}}
\renewcommand{\PP}{\mathbf{P}}
\newcommand{\OO}{\mathcal{O}}


\DeclareMathOperator{\param}{\mathsf{Par}}
\DeclareMathOperator{\gen}{\mathsf{Gen}}
\DeclareMathOperator{\setup}{\mathsf{Setup}}
\DeclareMathOperator{\indexer}{\mathsf{Index}}
\DeclareMathOperator{\comm}{\mathsf{Com}}
\DeclareMathOperator{\open}{\mathsf{Open}}
\DeclareMathOperator{\prove}{\mathsf{Prove}}
\DeclareMathOperator{\extract}{\mathsf{Extract}}
\DeclareMathOperator{\simulate}{\mathsf{Sim}}
\DeclareMathOperator{\RS}{\mathsf{RS}}
\DeclareMathOperator{\FFT}{\mathsf{FFT}}
\DeclareMathOperator{\Quotient}{\mathsf{Quotient}}
\DeclareMathOperator{\agree}{\mathsf{agree}}

\renewcommand{\adv}{\mathsf{Adv}}


\author{%
Ulrich Hab{\"o}ck\thanks{This work is supported by Horizenlabs, Inc.,  and Orbis Labs.}
\\\\
Orbis Labs
\\
\texttt{team@orbislabs.com}
}

\begin{document}
%\frontmatter
\title{%
A summary on the FRI low degree test
%\\
%{\small Version 1.2}
%
}
\date{%
\today\footnote{%
This updated version of the summary corrects a typo in the soundness error formula of Theorem \ref{thm:BatchedFRISoundness}.
Furthermore, the concrete security parameters are now changed to the case of algebraic batching .
}
}
\maketitle

%\setlength{\parskip}{5mm}


\begin{abstract}
This document is an informal summary on the FRI low degree test \cite{FRI}, \cite{ProximityGaps}, and DEEP algebraic linking from \cite{DEEPFRI}. 
Based on its most recent soundness analysis \cite{ProximityGaps}, we discuss parameter settings for practical security levels, how FRI is turned into a polynomial commitment scheme, and the soundness of DEEP sampling in the list decoding regime.
In particular, we illustrate the DEEP method applied to proving satisfiability of algebraic intermediate representations and prove a soundness error bound which slightly improves  the one in \cite{ethSTARK}.
\end{abstract}

%Keywords: SNARKs, recursive proofs, aggregation scheme

%\begin{KeepFromToc}
 \tableofcontents
%\end{KeepFromToc}

%\mainmatter
\section{Introduction}
FRI, in full length \textit{Fast Reed-Solomon Code Interactive Oracle Proof of Proximity}, is a low-degree test for functions on an FFT domain, i.e. a smooth multiplicative subgroup $D$ of a finite field $F$.  
Given a function 
\[
f: D\longrightarrow F
\]
FRI proves that $f$ corresponds to a polynomial of low degree with respect to the size of $D$. 

The oracles provided by the FRI prover are again functions on $D$, or a subdomain of it, and the verifier queries the values at points from their domain only.
Due to the small size of $D$ (compared to the cryptographically large sampling spaces of polynomial IOPs) the key tool for distinguishing one  polynomial from another is statistical sampling. 
However, a statistical test can only assure \textit{proximity}, which we measure by the fractional Hamming distance
\begin{equation*}
\delta(f, g) = \frac{1}{|D|}\cdot \big|\left\{x\in D: f(x)\neq g(x)\right\}\big|.
\end{equation*}
%(The smaller the distance the more samples are needed.) 
In FRI the prover convinces the verifier that a given function $f: D\longrightarrow F$ is \textit{$\theta$-close} (and not necessarily equal) to a low-degree polynomial, i.e.
\begin{equation*}
\delta(f, p) \leq \theta,
\end{equation*}
for some polynomial $p(X)$ of specified maximum degree.
In words, $f$ agrees with $p(X)$ on a set $A\subseteq D$ of density $\frac{|A|}{|D|} \geq 1-\theta$. 
In applications the agreement set is chosen large enough to infer global properties on the low degree polynomial. 
%This step is called \textit{algebraic linking} and will be explained in the course of this writeup. 
It is exactly this inference principle which makes FRI applicable to proving algebraic relations between a set of low-degree polynomials, might it be circuit satisfiability or the evaluation identities for building a polynomial commitment scheme.


\textit{We stress that fact that this summary does not present any novelties.}
Instead it is an outcome of my learnings when reading the papers \cite{ProximityGaps}, \cite{DEEPFRI}, \cite{FRI}, \cite{Redshift} and \cite{ethSTARK}. 
The document provides an overview of FRI and its soundness analysis, including some background on decoding Reed-Solomon codes.
It discusses the DEEP method and how it is related to polynomial commitment schemes and we sketch the more general notion of list polynomial commitment schemes \cite{Redshift}. 
Finally we illustrate how soundness error bounds are proven for the DEEP method in the list decoding regime. 
In the course the latter we clarify two points of \cite{ethSTARK}, which are the usage of degree correction factors (these are not needed for the DEEP method), and the quadratic occurence of the decoder list size bound in their soundness error formula, which can be replaced by a linear term.   

We assume that the reader knows (public-coin) interactive oracle proofs and their security notions  \cite{IOPs}, such as soundness, proof of knowledge, and statistical (i.e. perfect) honest verifier zero-knowledge. 
Any IOP with these security properties can be compiled into a succinct non-interact-ive argument of knowledge in the random oracle model \cite{IOPs}:
The prover oracle messages are committed by Merkle roots using the random oracle, and the verifier coins are the answers of the random oracle given the prover messages as its input.


\subsection{Notation}
Throughout the document we assume that the size of the sampling domain $D$ and the number of coefficients $k$ are both powers of two, and that the multiplicative subgroup $F^*$ of the finite field $F$ is smooth enough to contain a subgroup of order $k$ and $|D|$. 
The absolute Hamming distance between two function $f,g\in F^D$ is
\begin{equation*}
\Delta(f, g) = \big|\left\{x\in D: f(x)\neq g(x)\right\}\big|,
\end{equation*}
and we shall write 
\[
\delta(f, g) = \frac{1}{|D|}\cdot \Delta(f,g)
\] 
for its fractional variant.
Given any subset $V\subseteq F^D$, we denote by
\[
\Delta(f, V) = \min_{v\in V} \Delta(f,v)
\]
the minimal distance of $f\in F^D$ to $V$, and likewise we define the minimal fractional Hamming distance.
We denote by
\begin{equation*}
\RS_k[F,D] = \big\{ \left.p(x)\right|_{x\in D} : p(X)\in F[X], \deg p(X) \leq k-1 \big\}
\end{equation*}
the Reed-Solomon code of rate $\rho = \frac{k}{n}$ over the domain of definition $D\subseteq F^*$. 
(Here, $p(x)|_{x\in D}$ denotes the domain evaluation, i.e.  the functional restriction of $p(x)$ to $D$.)
Whenever we say that a polynomial $p(X)$ belongs to $\RS_k[F,D]$, we mean that its domain evaluation $p(x)|_{x\in D}$ is a code word. 

In the context of oracle proofs, we denote oracles for  functions $f\in F^D$ by
$\big[ f \big]$, and occasionally call them \textit{domain evaluation oracles} to distinguish from the oracle notion of univariate polynomial IOPs \cite{DARK}, which models an ideal polynomial commitment scheme.
In order to a closer alignment with the compiled protocol in the random oracle model, we prefer to say that a party $P$ (the prover) ``sends'' $[f]$ to another party $V$ (the verifier), meaning that $P$ sets
up the oracle for $f$ and $V$ obtains oracle access for it.


\section{Correlated agreement}

As in polynomial IOPs, building random linear combinations is the core reduction argument in FRI . 
While the soundness of it is easily proven in the polynomial model, this is not the case for domain evaluations. 
Even in the most elementary case, proving that if with noticeable probability a random linear combination  of two  given functions $f_0$, $f_1$ is $\theta$-close to a Reed-Solomon codeword, i.e.
\[
\delta\big(f_0 + \lambda\cdot f_1, \RS_k[F,D]\big) \leq \theta,
\]
 then a similar proximity  would hold for $f_0$ and $f_1$ , is non-trivial, in particular when targeting only a small increase in the distance bound $\theta$.  
The most advanced result is the correlated agreement theorem (or \textit{proximity gap theorem}) of Ben-Sasson, et al. \cite{ProximityGaps}.
We state it for the case of algebraic curves, which is typically favored in the context of proof composition.

\begin{thm}
\label{thm:CorrelatedAgreement}
(Correlated agreement theorem, full version of \cite{ProximityGaps}, Theorem 6.1 and 6.2)
%in the full paper: Theorem 6.1 and 6.2.
Let $\RS_k = \RS_k[F,D]$ be the Reed-Solomon code over a a finite field $F$ with defining set $D\subseteq F$ and rate $\rho=\frac{k}{|D|}$.
Given a proximity parameter $\theta\in (0,1-\sqrt\rho)$  and words $f_0$,$f_1$,...,$f_{N-1}\in F^D$ for which
\begin{equation*}
\frac{%
	\Big|\Big\{\lambda\in F : \delta\big(f_0 + \lambda\cdot f_1 + \ldots + \lambda^{N-1}\cdot f_{N-1}, \RS_k\big) \leq \theta \Big\} \Big|
}{|F|} > \varepsilon,
\end{equation*}
where $\varepsilon$ is as in \eqref{e:epsilonU} and \eqref{e:epsilonJ}  below.
Then there exist polynomials $p_0(X)$, $p_1(X)$,...,$p_{N-1}(X)$ belonging to $\RS_k$, and a set $A\subseteq D$ of density 
$
\frac{|A|}{|D|}\geq 1 - \theta
$ 
on which $f_0, \ldots, f_{N-1}$ jointly coincide with $p_0, \ldots, p_{N-1}$, respectively. 
In particular, 
\begin{equation*}
\delta\big(f_0 + \lambda\cdot f_1 + \ldots + \lambda^{N-1}\cdot f_{N-1}, \RS_k\big) \leq \theta
\end{equation*}
for every $\lambda\in F$. 
\end{thm}

The proof of the correlated agreement theorem, including concrete values for the soundness error bound $\varepsilon$, is an algebraic analysis of the Berlekamp-Welch or the Guruswami-Sudan list decoder over the rational function field $K=F(Z)$. 
It uses the Polichuk-Spielmann lemma to ``glue together'' the outputs of the decoder for $f_0 + \lambda\cdot f_1 + \ldots +\lambda^{N-1}\cdot f_{N-1}$ over the ``small'' field $F$ by means of the decoder result for the word 
\[
f_0 + Z\cdot f_1 + \ldots + Z^{N-1}\cdot f_{N-1} \in K^D
\]
over the infinite field $K$: 
If for a noticeable fraction of  $\lambda$'s the distance to the Reed-Solomon code is $\leq\theta$, then the same holds over $F(Z)$.
%(The analysis of the Berlekamp-Welch decoder is much more easy than in the list decoding case, see \cite{ProximityGaps}.)

Depending on the decoding regime the following values for $\varepsilon$ are obtained by \cite{ProximityGaps}:
\begin{enumerate}
\item
\textit{Unique decoding regime.} 
For $\theta\in \left(0,\frac{1-\rho}{2}\right]$, Theorem \ref{thm:CorrelatedAgreement} holds with
\begin{equation}
\label{e:epsilonU}
\varepsilon = (N-1)\cdot \frac{|D|}{|F|}.
\end{equation}
\item
\textit{List decoding regime.} 
For $\theta\in \left(\frac{1-\rho}{2},1-\sqrt\rho\right)$ and setting $\theta =1-\sqrt\rho \cdot\left(1 +\frac{1}{2m}\right)$, with $m\geq 3$, Theorem \ref{thm:CorrelatedAgreement} holds with
\begin{equation}
\label{e:epsilonJ}
\varepsilon =  (N-1)\cdot \frac{\left(m + \frac{1}{2}\right)^7}{3\cdot \rho^{\frac{3}{2}} } \cdot \frac{|D|^2}{|F|}.
\end{equation}
%\[
%\varepsilon =  (N-1)\cdot \frac{k^2}{|F|\cdot \min\left(\frac{1}{m}, \frac{\sqrt\rho}{10}\right)^7}
%\approx (N-1)\cdot m^7\cdot \rho^{-\frac{3}{2}} \cdot \frac{|D|^2}{|F|}.
%\]
\end{enumerate}

For linear varieties of the form $f_0 + \lambda_1\cdot f_1+  \ldots + \lambda_{N-1}\cdot f_{N-1}$ a similar result holds, with the $(N-1)$- term in \eqref{e:epsilonU} and \eqref{e:epsilonJ} replaced by $1$. 
See the full version of \cite{ProximityGaps}, Theorem 4.1 and 5.1. 

Note that in contrast to the unique decoding regime, the sampling domain size $|D|$ occurs quadratically in the error bound, and therefore the field needs to be significantly larger to obtain the same magnitude of soundness as in the unique decoding regime.  
This quadratic occurrence is inherently connected with the Guruswami-Sudan-Johnson list size bound. 
It is conjectured by \cite{DEEPFRI} that Reed-Solomon codes over prime fields $F$ are more ``nicely'' list decodable, even up to capacity bound $1-\rho$, and that the sampling domain size occurs only linearly in the error bound. 
We will discuss this conjecture in Section \ref{s:RSConjecture}. 


\section{FRI proof of proximity}

%Let $D$ be a multiplicative subgroup of a finite field $F$, $|D|=2^m$, and let be $k$ a non-trivial factor of $|D|$. 
Given a function $f\in F^D$ and its  domain evaluation oracle 
$
[f(x)|_{x\in D}],
$ 
FRI is an interactive oracle proof for $f$ being close to a word from $\RS_k[F,D]$, 
\[
\delta( f, \RS_k[F,D]) \leq \theta,
\]
given a \textit{proximity parameter} $\theta$ of at most the Johnson list decoding bound. % $\theta < 1-\sqrt\rho$.
%
As most interactive oracle proofs, the FRI protocol is comprised of a \textit{commit phase} and a \textit{query phase}.
The commit phase consists of one or several rounds, in which the prover sends domain evaluation oracles to the verifier, who then responds with a random challenge.
That phase of FRI performs a random reduction similar to the one of an inner product argument \cite{BootleGroth}, at least halving the instance size with each step by a linear folding procedure.
In the concluding query phase, the verifier asks for openings of the oracles at random points from their domain of definition. 
These openings are then used to check consistency of each reduction step of the commit phase.


\subsection{Reduction}


The commit phase of FRI starts with the instance to proven, i.e. the polynomial $p_0(X)=p(X)$ and its domain evaluation over $D_0=D$.
This instance is stepwised reduced by means of a random folding procedure, yielding a sequence of polynomials 
\[
p_0(X), p_1(X), \ldots , p_r(X)\in  F[X]
\]
as words over the domains 
\[
D_0\supseteq D_1\supseteq \ldots\supseteq D_r,
\]
respectively, wheras their degree bounds $k_i$, $\deg p_i(X) < k_i$, decrease with the same ratio as the domains.
% where both the degree bounds $\deg p_i(X) < k_i$ as well as the domain size $|D_i|$ at least halve in each step of the reduction.  
The quotients 
\[
a_i = \frac{k_{i-1}}{k_i} = \frac{|D_{i-1}|}{|D_i|} 
\]
are the \textit{reduction factors}, and we throughout assume that $a_i\geq 2$. (By our assumptions on $|D|$ and $k$ the $a_i$ are again powers of two.)
The number of rounds $r\geq 1$,  their reduction factors $a_1,\ldots, a_r$ and therefore the decreasing sequence of domains $D_0,\ldots, D_r$, are parameters of FRI.

\begin{protocol}[FRI commit phase]
Given the domain evaluation $[p_0(x)|_{x\in D_0}]$ for the polynomial $p_0(X)\in F[X]$, $\deg p_0(X) < k_0$, the commit phase consists of the following $r$ rounds.
\begin{itemize}
\item
In each round $i$, $1\leq i\leq r$, the prover decomposes the previous polynomial $p_{i-1}(X)$ of $\deg p_{i-1}(X) < k_{i-1}$,  
%(In the first round $p_0(X)=p(X)$ and $k_0 =k$.)
according to
\begin{equation}
\label{e:FRIdecomposition}
p_{i-1}(X) = F_0(X^{a_i})+ X\cdot F_1(X^{a_i})+  \ldots +X^{a_{i-1}} \cdot F_{a_i-1}(X^{a_i}),
\end{equation}
where each 
\[
\deg F_i(Y) <  \frac{k_{i-1}}{a_i} = k_i.
\] 
(For $a_i=2$ this is the decomposition into odd and even parts.)
The verifier samples a random challenge $\lambda_i\sample F$, sends it to the prover, which in turn responds with the linear combination
\begin{equation*}
p_i(Y)=F_0(Y)+ \lambda_i \cdot F_1(Y)+  \ldots + \lambda_i^{a_{i-1}} \cdot F_{a_i-1}(Y)
\end{equation*}
as a word on the reduced domain $D_i= D_{i-1}^{a_i} =\{x^{a_i}: x\in D_{i-1}\}$. 
That is, it sends
$
[p_i(y)|_{y\in D_i}]
$ 
to the verifier. 
In the last step however, $i=r$, the polynomial 
$p_r(X)\in F[X]$
is revealed in full length instead.
\end{itemize}
\end{protocol}

Let us elaborate on the decomposition \eqref{e:FRIdecomposition} in terms of the reduction map
\[
\pi_i: D_{i-1}\longrightarrow D_i, \quad x\mapsto x^{a_i}.
\] 
Notice that for each $y$ in $D_i$, $y= x^{a_i}$, the values of $F_0(y)$,\ldots, $F_{a_i-1}(y)$ are uniquely determined by the values of 
\[
F_0(y)+ F_1(y)\cdot X+...+F_{a_i-1}(y)\cdot X^{a_i-1} 
\] 
on the coset $\pi_i^{-1}(y)=x\cdot\ker(\pi_i)$, and these values are exactly the ones given by $p_{i-1}(X)$.
Hence if $\tau$ is a generator of $\ker(\pi_i)=\{1,\tau,\ldots, \tau^{a_i-1}\}$, then
\begin{equation*}
\begin{aligned}
p_i(\pi_i(x)) &= L_0\left(p_{i-1}\left(\tau^0\cdot x\right),\ldots, p_{i-1}\left(\tau^{a_i-1}\cdot x\right)\right) 
\\
&+ 
\lambda_i\cdot L_1\left(p_{i-1}\left(\tau^0\cdot x\right),\ldots, p_{i-1}\left(\tau^{a_i-1}\cdot x\right)\right) 
\\
&+ \ldots
\\
& + \lambda_i^{a_i-1}\cdot L_{a_i-1}\left(p_{i-1}\left(\tau^0\cdot x\right),\ldots, p_{i-1}\left(\tau^{a_i-1}\cdot x\right)\right) 
\end{aligned}
\end{equation*}
where $(L_0,...,L_{a_i-1})$ is the Lagrange interpolation map for the coset $x\cdot ker(\pi_i)$. 
In other words, 
\begin{equation}
\label{e:FRIconsistency}
p_i(\pi_i(x)) = \FFT_{\lambda_i / x}\left(p_{i-1}\left(\tau^0\cdot x\right),\ldots, p_{i-1}\left(\tau^{a_i-1}\cdot x\right)\right),
\end{equation}
that is the Fourier transform of the vector $\big(p_{i-1}\left(\tau^0\cdot x\right),\ldots, p_{i-1}\left(\tau^{a_i-1}\cdot x\right)\big)$, evaluated at $\frac{\lambda_i}{x}$.
This equation will be used to check consistency between the provided oracles. 
% FFT(p_{i-1}(X)|_{x\cdot ker})
% evaluated at lambda / x
%

In some situations it is more efficient to compute the values of $p_i(y)$ over $D_i$ directly from the ones of $p_{i-1}(x)$, $x\in D_{i-1}$, using \eqref{e:FRIconsistency}. 
In terms of field additions $\mathsf A$, multiplications $\mathsf M$,  and FFT operations $\FFT(a_i)$ of size $a_i$, 
this can be done\footnotemark in
\footnotetext{%
Using batch inversion to compute $\frac{\lambda_i}{x}$ over $D_i$ costs $2\cdot |D_i|\:\mathsf M$, computing the fiber FFT's costs $|D_i|\cdot a_i\cdot \log(a_i) \cdot (\mathsf M +\mathsf A)$, and evaluating them another $|D_i|\cdot (a_i - 1)\cdot (\mathsf M + \mathsf A)$.
}% 
% Computing \lambda_i / x for every x in |D_i| costs
%	2 * |D_i| M,
% using batch inversion. Computing the FFT's for each fiber costs
%    |D_i| * a_i * log(a_i) (M + A),
% and evaluating at \lambda_i/x costs another
%  |D_i| *  (a_i - 1) * (M + A),
% using the Horner scheme. This leads to overall
% |D_i| * ( 1 + a_i + a_i*log a_i) M + |D_i| * ( -1 + a_i + a_i*log a_i) A.  
\[
|D_i|\cdot \big((a_i + 1 + a_i\cdot\log a_i)\:\mathsf M +  (a_i - 1 + a_i\cdot\log a_i)\:\mathsf A\big)\approx |D_i|\cdot (a_i + 1 + a_i\cdot\log a_i)\:\mathsf M,
\]
compared to 
% Taking the random linear combination of the coefficient vectors costs
% a_i * k_i = a_i * |D_i| (multiplications + additions),
% the domain evaluation costs a single FFT(D_i), assuming no coset here.
\[
a_i \cdot k_i \; (\mathsf M + \mathsf A) + 
\FFT(|D_i|) \approx |D_i|\cdot (a_i + \log_2 |D_i|) \:\textsf M
\] 
when computing the domain evaluation of the random linear combination $p_i(X)$.
Hence using equation \eqref{e:FRIconsistency} is more efficient whenever
\begin{equation}
1 + a_i\cdot \log_2(a_i) < \log_2 |D_i|,
\end{equation}
which holds for most reduction steps  when $a_i=2$. 
For $a_i = 2^2$ and $a_i = 2^3$ we already obtain $|D_i|>2^{9}$ and $|D_i| > 2^{25}$, respectively.   
However, it should be noticed that these counts do not take into account that equation \eqref{e:FRIconsistency} is better parallelizable than the FFT approach. 

 
\subsection{Sampling phase}

In the query phase the verifier samples at random points from the defining domains of the oracles, and use the returned values to check the consistency of all reduction steps.
\begin{protocol}[FRI query phase]
The query phase consists of $s\geq 1$ many rounds. 
\begin{itemize}
\item
In each round the verifier samples an $x_0 \in D_0$ uniformly at random, computes $x_1, \ldots, x_r$ recursively via  $x_{i}=\pi_{i}(x_{i-1})$, and checks if
\[
p_i(x_i)= \FFT_{\lambda_i / x_i}\big(p_{i-1}(x_{i-1}), p_{i-1}(\tau\cdot x_{i-1}), \ldots, p_{i-1}(\tau^{a_{i-1}}\cdot x_{i-1})\big),
\]
for every $i=1,\ldots,r$, by querying  the values of each $p_{i-1}$  over the coset $x_{i-1}\cdot\ker\pi_i$.
\end{itemize}
\end{protocol}

Notice that unlike in \cite{ProximityGaps} we choose $x_0$ uniformly from $D_0$, and form the $x_i$ by projecting $x_{i-1}$ onto $D_i$. 
In distribution, this way of sampling is equivalent to the one in the paper, which starts with $x_r\sample D_r$,  and then samples $x_{i-1}$ uniformly from the coset  $\pi_i^{-1}(x_i)$.

\subsection{Batching}
As for linear polynomial commitment schemes, batching is done via random linear combinations. 
We will only discuss the algebraic variant, which uses powers of a single random challenge. 
(Again, this is the one favored in the context of proof composition.)

Given a batch of $L$ low-degree polynomials $q_0(X)$, \ldots, $q_{L-1}(X)$, the verifier samples a random challenge $\lambda\sample F$. 
The prover computes the linear combination
\begin{equation}
\label{e:FRIbatchPoly}
h(X) = \sum_{i=0}^{L-1} \lambda^i\cdot q_i(X),
\end{equation}
sends the oracle of it,
\[
[h(x)|_{x\in D}],
\]
to the verifier.
Then both prover and verifier continue with FRI for $h$. 
Each $x_0\sample D_0=D$ from the query phase of FRI is used to additionally check consistency between the oracle for $h(X)$ and the ones in the batch, $q_0(X),\ldots, q_{L-1}(X)$, using \eqref{e:FRIbatchPoly}. 

%As for a FRI reduction step, the soundness error of the batching is
%\[
%\varepsilon_B =(L-1)\cdot \varepsilon, 
%\]
%where $\varepsilon$ is the error in the linear variant of the correlated agreement theorem.

\subsection{Soundness}


%\subsubsection{Proven soundness error}

%The proof of the soundness error for FRI is as for Lemma 8.2 in Ben-Sasson, et al., 2020, replacing the error bound of the affine batching step by the algebraic curve variant:

%\begin{prop}[Commit phase soundness error] 
%Suppose that the functions $q_i(x)$, $i=0,...,L-1$, have correlated agreement with $\RS_k$ on a set of density of at most $\alpha =\left(1+\frac{1}{2m}\right)\cdot\sqrt\rho$, for $m\geq 3$. 
%Then except with probability
%\[
%\left(L-\frac{1}{2}\right) \cdot \frac {\left(m+ \frac{1}{2}\right)^7}{2\cdot\sqrt\rho^3}\cdot \frac{|D_0|^2}{|F|} 
%+ \frac{(2m+1)\cdot (|D_0|+1)\cdot \sum_{i=1}^{r} a_i}{|F|}
%\]
%in the randomnesses of the commit phase, the 
%probability for passing the query phase is at most $\alpha$.  
%\end{prop}
%
%As a consequence we have the following soundness error for batched FRI. This is Theorem 3.8 in Ben-Sasson, et al., 2020, adapted to the algebraic batching case.

The soundness analysis of FRI is based on a strengthening of the correlated agreement theorem, which allows to additionally keep track of the success probability for the FRI query phase by a sub-probability measure $\mu$.
We state that \textit{weighted correlated agreement theorem} in Appendix \ref{s:WeigthedCorrelatedAgreement}.
%For brevity we skip the explicit formulation of that weighted correlated agreement theorem, and refer to the full version of \cite{ProximityGaps}, Section 7.
For proximity parameters close to the Johnson bound, the soundness error of the batched FRI oracle proof is as follows\footnotemark:
\footnotetext{%
We would like to thank Paul Gafni for pointing out a typo in formula \eqref{e:EpsilonFRI} in a previous version of the document.
}

\begin{thm}[Batched FRI soundness error, full version of \cite{ProximityGaps}, Theorem 8.3]
\label{thm:BatchedFRISoundness}
Suppose that $q_i\in F^D$, $i=0,\ldots,L-1$, is a batch of functions given by their domain evaluation oracles.
 If an adversary passes batched FRI for $\RS_k[F,D]$ and proximity parameter $\theta =1-\sqrt\rho \cdot \left(1+\frac{1}{2m}\right)$, $m\geq 3$, with a probability larger than
\begin{equation}
\label{e:EpsilonFRI}
\begin{aligned} 
\varepsilon = \left(L-\frac{1}{2}\right) \cdot \frac {\left(m+ \frac{1}{2}\right)^7}{3\cdot\sqrt\rho^3}\cdot \frac{|D_0|^2}{|F|} 
+ \frac{(2m+1)\cdot (|D_0|+1)\cdot \sum_{i=1}^{r} a_i}{\sqrt{\rho}\cdot |F|} \quad\quad
\\
+(1-\theta)^s ,
\end{aligned}
\end{equation}
 then the functions $q_i\in F^D$, $i=0,\ldots,L-1$, have correlated agreement with $RS_k[D,F]$ on a set of density of at least $\alpha >\left(1+\frac{1}{2m}\right)\cdot \sqrt\rho$.
\end{thm}
\begin{rem}
The case of linear batching of two functions $q_0$, $q_1$ corresponds to the case $L=2$, in which $L-\frac{1}{2}=\frac{3}{2}$. 
The same is true for affine batching of several functions, its soundness error is obtained from \eqref{e:EpsilonFRI} by replacing $L-\frac{1}{2}$ by $\frac{3}{2}$, see \cite{ProximityGaps}.
\end{rem}
%For proximity parameters below 

The first two terms in \eqref{e:EpsilonFRI},
\[
\varepsilon_C= \left(L-\frac{1}{2}\right) \cdot \frac {\left(m+ \frac{1}{2}\right)^7}{3\cdot\sqrt\rho^3}\cdot \frac{|D_0|^2}{|F|} 
+ \frac{(2m+1)\cdot (|D_0|+1)\cdot \sum_{i=1}^{r} a_i}{\sqrt{\rho}\cdot |F|},
\] 
correspond to soundness error of the commit phase, reflecting the systematic error estimated by the correlated agreement theorem and collected over the batching step and the reduction rounds.
In words, 
%if an adversary passes the commit phase such that with a probabability of at least $\varepsilon_C$ (w.r.t. the challenges of the commit phase) the oracles satisfy all consistency checks on a set of density of at least $\alpha = 1-\theta$, then the batch must have correlated agreement with agreement density $\geq \alpha$. 
if the oracles in the batch do not share the claimed correlated agreement for $\alpha = 1-\theta$, then except with probability $\varepsilon_C$,  the oracles produced during the commit phase cannot be ``nice''. 
That is, the set where all consistency checks would hold is \textit{at most} of density $\alpha$.
The remaining term, 
\[
\varepsilon_Q = (1-\theta)^s,
\]
is the soundness error of the query phase with $s$ rounds.
This is the probability not to detect such a set of non-``nice'' oracles using $s$ independent samples. %do not satisfy all consistency checks on a set of density $\geq \theta$.


%We point out that for $\theta\leq \frac{1-\rho}{2}$ the soundness error of the commit phase phase is significantly smaller. 


\subsection{Example parameters}

One way to settle the parameters is as follows. 
For target security level $2^{-\lambda}$, we assure that 
\begin{enumerate}
\item
the soundness error for the commit phase is bounded by  $\frac{1}{2}\cdot 2^{-\lambda}$. 
For that we choose the maximum Johnson proximity $m\geq 3$ so that 
\[
\begin{aligned}
\varepsilon_C \leq \frac{1}{2}\cdot 2^{-\lambda},
\end{aligned}
\]
\item
the soundness error of the query phase is bounded by $\frac{1}{2}\cdot 2^{-\lambda}$. 
Using $m$ from the first step, we determine the number $s$ of query rounds via
\[
\varepsilon_Q = \sqrt\rho^s \cdot \left(1 + \frac{1}{2m}\right)^s \leq \frac{1}{2}\cdot 2^{-\lambda}.
\]
\end{enumerate}


The following examples\footnotemark consider a situation is similar to the one in plonky2 \cite{PolygonZero}.
\footnotetext{%
In a previous version of the document the example parameters where based on linear batching of FRI. 
The current ones consider algebraic batching.
} 
We take extensions $F$ of a base field of size $|F_b|=2^{64}$, and sampling domain sizes $|D_0|=2^{12}\cdot\rho^{-1}$, where we vary the blow-up factors $\rho^{-1}$ to the maximum possible for the given security level.    
The number of polynomials is taken as $L= 300$, and we assume that these are grouped into 
\[
\{100,100,100\} 
\]
polynomials, each group committed by a single tree using Merkle caps.
The height for the Merkle caps is chosen to minimize the proof size.
For each blow-up factor we compute the proof size (assuming a hash size of $256$ bits), and as a very coarse measure for the prover complexity the number hashes\footnotemark it needs to compute. 
\footnotetext{%
Each call of the hash processes another $r=256$ bits. 
}

\subsubsection{67 bits of security}


Such a configuration might be still interesting in practice, as its security can be increased by \textit{grinding} (see \cite{ethSTARK}): 
Another $13$ bits proof of work bound to the proof generation, and one obtains overall  $80$ bits of security. 

\begin{itemize}
\item
With a degree $2$ extension of $F_b$, hence a field size of $128$ bits, the best security level one can obtain for $\rho=2^{-5}$ is about $69$ bits. 
The commit phase error is 
\[
\varepsilon_C\approx 2^{-68.21},
\]
with Johnson proximity $m=3$. 
To have about the same soundness error in the query phase, we demand $s=29$ samples, yielding 
\[
\varepsilon_Q \approx 2^{-68.33}.
\] 
With a reduction strategy $\{a_1,a_2\}=\{2^4,2^3\}$ we obtain proof sizes of about $104$ kB. 
\begin{center}
\begin{tabular}{|c|c|c|c|c|}
\hline
$-\log_2(\rho)$ & $m$	& $s$	& $T$ in hashes 	& $|\pi|$ in bytes
\\\hline\hline
$3$ 	& $6$	& $50$	& $17.4$ k 	& $170.2$ k
\\
$4$ & $4$	& $38$	& $34.8$ k & $132.9$ k
\\
$5$ & $3$	& $30$	& $69.6$ k 	& $107$ k
\\\hline
\end{tabular}
\end{center}

\item
With a degree $3$ extension of $F_b$, hence a field size of $192$ bits, one can choose higher blow-up factors. 
For  $\rho =2^{-6}$ we obtain $67$ bits security by  
\[
\varepsilon_C\approx 2^{-68.00},
\]
where the Johnson proximity is $m=1,487$. 
To have about the same soundness error in the query phase, we need only $s=23$ samples, yielding 
\[
\varepsilon_Q\approx 2^{-68.99}.
\]
With the same reduction strategy as before, we reduce the proof size down to $89$ kB. 
However, this comes at the cost of about tripling the prover cost. 
\begin{center}
\begin{tabular}{|c|c|c|c|c|}
\hline
$-\log_2(\rho)$ & $m$	& $s$	& $T$ in hashes 	& $|\pi|$ in bytes
\\\hline\hline
$6$ 	& $1,427$	& $23$	& $209$ k 	& $88.9$ k
\\
$8$ & $713$	& $18$	& $836$ k & $72.4$ k
\\
$10$ & $356$	& $14$	& $3,342$ k 	& $58.4$ k
\\\hline
\end{tabular}
\end{center}

\end{itemize}


\subsubsection{112 bits security}

As in the previous setting, we discuss this level of security as it can be improved by grinding, typically up to $128$ bits. 
All configurations use degree $3$ extensions of $F_b$.
\begin{center}
\begin{tabular}{|c|c|c|c|c|}
\hline
$-\log_2(\rho)$ & $m$	& $s$	& $T$ in hashes 	& $|\pi|$ in bytes
\\\hline\hline
$6$ 	& $16$	& $39$	& $209$ k 	& $149$ k
\\
$8$ & $7$	& $29$	& $836$ k & $115$ k
\\
$10$ & $3$	& $24$	& $3,342$ k 	& $99$ k
\\\hline
\end{tabular}
\end{center}
For higher blow-up factors, one needs to increase grinding. 
For example, for $-\log_2(\rho) = 11$ the best level of security that can be obtained with degree $3$ extensions is $109$ bits, leaving  $19$ bits for grinding. 
The proof size decreases down to $88.2$ kB, at the prover cost of $6,684,672$ hashes.  


\subsubsection{128 bits security}

These configurations do not use grinding, and hence have quite large proof sizes.
Again, we use degree $3$ extensions of $F_b$.

\begin{center}
\begin{tabular}{|c|c|c|c|c|}
\hline
$-\log_2(\rho)$ & $m$	& $s$	& $T$ in hashes 	& $|\pi|$ in bytes
\\\hline\hline
$3$ 	& $9$	& $91$	& $26$ k 	& $322$ k
\\
$4$ & $6$	& $69$	& $52$ k & $250$ k
\\
$5$ & $4$	& $56$	& $104$ k  &  $208$ k
\\\hline
\end{tabular}
\end{center}


\subsection{Conjectured security}
\label{s:FRIConjecture}

In their line of work on FRI \cite{FRI, DEEPFRI, ProximityGaps} the authors make several conjectures on the soundness of FRI for proximity parameters above the Johnson bound. 
In the most recent one, they state the following.

\begin{conj}[Full version of \cite{ProximityGaps}, Conjecture 8.4]
\label{con:FRIsoundness}
There exist constants $c_1$, $c_2$ such that for all $\theta =1-\rho -\eta$, $\eta >0$,  the soundness error in the correlated agreement theorem on $f_0,\ldots ,f_{N-1}$ is bounded by
\[
\varepsilon \leq \frac{1}{(\eta\cdot\rho)^{c_1}} \cdot \frac{(N\cdot n)^{c_2}}{|F|}.
\]
\end{conj}
\begin{rem}
For purely linear batching, a similar conjecture is stated.
\end{rem}

We point out that the above conjecture (as well as its corresponding one in \cite{FRI}) is stated isolated from any general conjectured properties on Reed-Solomon codes, such as list decodability up to capacity bound (as done for DEEP method, see Section \ref{s:RSConjecture}). 
Instead it is rather justified by ``\textit{[to the best of our knowledge...] nothing seems to contradict}'' .
The authors consider the choice of $c_1=c_2=2$ reasonable, and for fields of characteristic $q>n$ they estimate that $c_1=c_2=1$. 
%For characteristics $q<n$, a counter example from Appendix B of  \cite{DEEPFRI} contradicts the latter choice of constants.  

The $c_1=c_2=1$ assumption is of particular interest for practitioners, as it yields proofs of halve the size as in the $c_1=c_2=2$ case. For example, it is used by the ethSTARK \cite{ethSTARK} (besides its provably secure parameter setting), as well as by plonky2 \cite{PolygonZero}.
%, i.e. their standard recursive configuration, their high rate recursive configuration, as well as an even higher rate config for their final proofs). 


\subsection{Adding zero-knowledge}

Zero-knowledge for FRI has to be provided on application level. 
In our use cases, the witnesses of an argument correspond to the values of some polynomial $q(X)$ on a given domain $H$ (the proving domain for Plonk, say). 
To protect it from being leaked by the queries of the $s$ query rounds (as well as by the final reduction polynomial), one uses a an $H$-disjoint \textit{coset} $a\cdot D$ of the FRI domain, and randomizes $q(X)$ outside the domain $H$. 
That is, the batching and the entire FRI reduction takes place on 
\[
a\cdot D_0\supseteq a\cdot D_1 \supseteq \ldots \supseteq a\cdot D_{r}, 
\]
instead of $D_0\supseteq D_1 \supseteq \ldots \subseteq D_r$, where $(a\cdot D_0) \cap H=\emptyset$.
This leads to running batched FRI for $q_i(X)$, $i=0,\ldots, L-1$, over the non-shifted domain $D_0$ on the shifted polynomials
\[
q_i(a\cdot X), 
\]
$i=0,\ldots,L-1$, instead.

The number of linear functionals of a polynomial $q_i(X)$ revealed in the course of a single FRI query are:  
One in the batching step, $a_i$ many for the coset evaluations in each of the reduction steps $i=1,..,r-1$, and 
\[
1+ \deg p_r(X) = \rho\cdot |D_r| =  \rho\cdot\frac{ |D_0|}{a_1\cdot a_2\cdot \ldots \cdot a_{r-1}}
\] 
linear functionals corresponding to the coefficients of the final reduction polynomial.
With $s$ queries this leads to overall
\begin{equation}
b=s\cdot (1+ \sum_{i=1}^{r-1} a_i + \rho\cdot \frac{|D_0|}{a_1\cdot a_2\cdot \ldots \cdot a_{r-1}})
\end{equation}
linear functionals. To reduce this number, one can add a blinding polynomial 
\[
h(x) \in \RS_k[F,D] 
\]
to the batch (coming with the cost of an extra commitment). 
Then the number of linear functionals revealed on a witness polynomial is reduced to $b= s$. 


In both cases, the randomization can be done without moving beyond $|H|-1$ in degree whenever a subset $B\subseteq H$ with $|B|=b$ remains ``unused'', i.e. unconstrained: 
Instead of taking 
\[
p(X)= p(X) + r(X)\cdot v_H(X),
\]
where $v_H(X)$ is the vanishing polynomial of $H$ and $r(X)$ a random polynomial of $\deg r(X)= b - 1$, one takes $p(X)$ as the polynomial interpolated from the witness values on $H\setminus B$ and randomly chosen values on $B$.


\section{FRI as a polynomial commitment scheme}
\label{ch:Polycommit}

FRI can be turned into a polynomial commitment scheme by means of the evaluation quotients
\begin{equation*}
h(x)=\frac{f(x)-v}{x-z}
\end{equation*}
of a committed word $f\in F^D$. 
This approach, called the DEEP method in \cite{DEEPFRI}  corresponds to the algebraic linking of the evaluation identity
\begin{equation*}
f(X) = v + h(x)\cdot (X-z)
\end{equation*}
with a low-degree problem on the sampling domain $D$, assuming that $z\notin D$. 
(For $z\in D$ the oracle can directly answer with the queried value. We will omit this case throughout our discussion.)

For proximity parameters $\theta$ up to the unique decoding radius one obtains a polynomial commitment scheme in the classical sense (when compiling the oracle proof into an argument using a secure partially disclosable vector commitment). 
In the list decoding regime the situation is a bit more subtle due to the non-uniqueness of $\theta$-close code words. 
In this case the DEEP method can be viewed as an oracle proof for a more general type of polynomial commitment scheme, called \textit{list polynomial commitment scheme} in \cite{Redshift}. 
However, as their notion does not cover the power of correlated agreement, we shall only sketch list polynomial commitment schemes.% and devote a separate section for DEEP algebraic linking.

\subsection{In the unique decoding regime}
For a proximity bound up to the unique decoding radius, i.e. $\theta < \frac{1-\rho}{2}$, the situation is quite simple. 
However, there are several ways to algebraically link the evaluation identity with a low-degree test. 

\subsubsection{A first construction}
\label{s:naivePC}

We first discuss a naive scheme, in which the maximum degree corresponds to the degree proven by FRI.
\begin{itemize}
\item 
\textit{Setup:}
The maximum degree $d=k-1$ is chosen as the maximum degree of polynomials belonging to $\RS_k[F,D]$.
%The size of the sampling domain $S$ is then taken such that $n=|S| = \beta\cdot k$, where the blowup factor is a power of two. 
%Notice that $\rho=\frac{k}{n}$ is the rate of the Reed-Solomon code 
%$\RS_k=\{p(x)|_{x\in S}: p(X)\in F[X], \deg p(X) \leq d\}$.

\item
\textit{Commit:} 
Given a polynomial $p(X)$ of degree $\deg p(X)\leq d$, the prover commits its domain evaluation over $D$, i.e. 
\[
\comm(p(X))= [p(x)|_{x\in D}].
\]
%where any partially disclosable vector commitment can be used. 

\item
\textit{Evaluation proof: }
%For any $z$ belonging to the evaluation domain, the opening value $v$ is directly provided by the oracle.
Given an opening claim $(z,v)$ with $z\notin D$, the prover engages with the verifier in a batched FRI argument on
\begin{align*}
f_1(x) &= \frac{p(x)-v}{x-z},
\\
f_2(x) &=x\cdot f_1(x)= x\cdot \frac{p(x)-v}{x-z}.
\end{align*}
with proximity bound $\theta = \frac{1-\rho}{2}$. 
This proof batches the functions into a random linear combination $f_1(x)+ \lambda\cdot f_2(x) =(1+\lambda \cdot x)\cdot \frac{p(x)-v}{x-z}$, and then runs FRI on it. 
The linear term $\lambda \cdot x$ is called \textit{degree correction factor}.
\end{itemize}

We point out that the two functions $f_1$, $f_2$ are not needed to be provided by another oracle, as their evaluations on $D$ can be computed from the values of $p(x)$.

Let us discuss that the evaluation proofs in fact provide a view on a unique polynomial of degree $\leq d$, determined by the values committed in $[p(x)|_{x\in D}]$.
First of all, if the prover passes with a probability $p$ greater than the soundness error of batched FRI on $f_1, f_2$ as above,  then there exist two polynomials $p_1(X)$, $p_2(X)$ of degree $\leq d$, and a correlated agreement set $A$ of density $1-\theta \geq \frac{1+\rho}{2}$ such that
\begin{align*}
f_1(x) &=p_1(x) \big|_{x\in A},
\\
x\cdot f_1(x) &= p_2(x)\big|_{x\in A},
\end{align*}
and hence also $x\cdot p_1(x) =p_2(x)\big|_{x\in A}$. 
As the density of $A$ is strictly greater than $\rho$, the polynomial $X\cdot p_1(X)-p_2(X)$ has at least $k+1=d+2$ zeroes and hence must be trivial, i.e. $X\cdot p_1(X) = p_2(X)$. 
This implies that $\deg p_1(X)\leq d-1$, and hence $p(x)$ coincides on $A$ with the degree $d$ polynomial 
\[
P(X)= v + (X-z)\cdot p_1(X),
\]
which evaluates to $v$ at $z$. 
Notice that $\delta(p(x),P(X)) < \frac{1-\rho}{2}$, hence a single evaluation proof implies distance to a degree $\leq d$ polynomial of at most the unique decoding radius. 
As a consequence, any other evaluation proof (on the same or any other query) is consistent with that unique degree $\leq d$ polynomial, showing that we indeed have a polynomial commitment scheme.
%Suppose that $P_1(X)$ and $P_2(X)$ are degree $d$ polynomials corresponding to the queries $(x_1,v_1)$, $(x_2,v_2)$ as argued above, and let $A_1$, $A_2$ be their agreement set with $p(x)$. As
%\[ 
%\frac{|A_1\cap A_2|}{|S|} \geq 1-2\cdot\frac{1-\rho}{2}=\rho,
%\]
%we conclude the formal identity $P_1(X)=P_2(X)$. 

\subsubsection{The refined scheme}

By similar reasoning (based on a degree $k=d+1$ polynomial vanishing on a set of density $>\rho$)  we can  remove the degree correction factor in the above naive scheme, running FRI for a proximity parameter $\theta < \frac{1-\rho}{2}$, only on the evaluation quotient of the claim: 
For any two evaluation claims $(z_1,v_1)$ and $(z_2, v_2)$ we conclude the existence of polynomials $p_1(X)$, $p_2(X)$ of degree $\leq k-1$ and sets $A_1$, $A_2$ of density $1-\theta > \frac{1+\rho}{2}$ such that
\[
\begin{aligned}
v_1 + (X-z_1)\cdot p_1(X),
\\
v_2 + (X-z_2)\cdot p_2(X),
\end{aligned}
\]
agree with $p(x)$  on $A_1$ and $A_2$, respectively.
Since the density of $A_1\cap A_2$ is at least $1 - 2\cdot \theta > \rho$, it contains at least $k + 1$ points, and by degree we may conclude the formal identity
\[
v_1 + (X-z_1)\cdot p_1(X) = v_2 + (X-z_2)\cdot p_2(X).
\]

%Let $(z',v')$ be any further claim than the first one $(z,v)$, from which we concluded a degree at most $d+1$ polynomial 
%\[
%P(X)= v+ (X-z)\cdot p_1(x),
%\]
%agreeing with $p(x)$ on a set $A$ of density $\geq\frac{1+\rho}{2}$. 
%Since $\frac{P(X)-v'}{X-z'}$ has enumerator degree $d+1$, then on this set $\frac{p(x)-v'}{x-z'}$ can agree with a polynomial from to $\RS_k = \RS_k[F,D]$  on at most $k=d+1$ points within $A$, if $P(z')\neq v'$.  
%This yields an overall agreement density of at most
%\[
%\alpha\left(\frac{p(x)-v'}{x-z'}, q(X)\right)\leq 
%\frac{k}{|D|} +\frac{1-\rho}{2}=\frac{1+\rho}{2} ,
%\]
%and hence 
%\[
%\delta\left(\frac{p(x)-v'}{x-z'}, \RS_k\right) \geq \frac{1-\rho}{2},
%\]
%which would be detected by the FRI verifier (except for a probability of at most $\varepsilon$). 

This leads to the following optimized scheme:
\begin{itemize}
\item
\textit{Setup:} 
The maximum degree is $d^+ = k$, where $k$ is the absolute rate of $\RS_k[F,D]$.
\item
\textit{Commit:} 
Given a polynomial $p(X)$ of degree $\deg p(X)\leq d^+$, the prover commits its domain evaluation over $D$, i.e.
\[ 
\comm(p(X))=[p(x)|_{x\in D}].
\]

\item
\textit{Evaluation proof:} 
Given an opening claim $(z,v)$ with $z\notin D$, the prover engages with the verifier in a batched FRI argument on 
\[
\frac{p(x)-v}{x-z}
\]
with proximity bound $\theta < \frac{1-\rho}{2}$.
\end{itemize}


\subsubsection{Multi-point queries}
\label{s:MultiPoint}

Instead of batching several point evaluation quotients, queries for the values of a polynomial $p(X)$ over a small set 
$\Omega=\{z_1,...,z_m\}\subset F\setminus D$ can be also proven via the multi-evaluation identity 
\begin{equation}
\label{e:MultiEvalIdentity}
\sum_{i=1}^m (p(X) - v_i) \cdot L(z_i,X) = 0 \mod v_\Omega(X),
\end{equation}
where $v_\Omega(X)= \prod_{j=1}^m (X-z_j)$ is the vanishing polynomial of  $\Omega$ and $L(z_i,X)=\prod_{j\neq i} \frac{X-z_j}{z_i-z_j}$ is the Lagrange polynomial at $z_i$. 
Similar to the single query case, one argues using the quotient
\begin{equation}
\label{e:MultiPointQuotient}
h(x) = \Quotient(p, \{(z_i,v_i):i=1,\ldots,m\}) = \frac{p(x)-V(x)}{v_\Omega(x)},
\end{equation}
where 
\[
V(X)= \sum_{i=1}^m v_i\cdot L(z_i,X)
\] 
is the unique degree $\leq m-1$ polynomial that interpolates the claim. 

Alternatively, as in the batch evaluation protocol of Boneh, et al. \cite{HaloInfinite}, one can replace the Lagrange kernel with the non-normalized variant $D(z_i,X)=\prod_{j\neq i} (X-z_j)$ 
\begin{equation}
\label{e:MultiEvalIdentity2}
\sum_{i=1}^m (p(X)-v_i) \cdot D(z_i,X) =  
0 \mod v_\Omega(X),
\end{equation}
and work with the quotient 
\[
h'(x) =%D(x,x)\cdot \frac{p(x)-V(x)}{v_\Omega(x)}= 
\sum_{i=1}^m \frac{p(x)-v_i}{x-z_i}
\]
instead.
%, where
%\begin{align*}
%D(x,x) &= \sum_{i=1}^m D(z_i,z_i)\cdot L(z_i,x)
%\\  
%     	&= \sum_{i=1}^ m  \prod_{j\neq i} (z_i-z_j)\cdot \prod_{j\neq i} \frac{x-z_j}{z_i-z_j} = \sum_{i=1}^m \prod_{j\neq i}(x-z_j)
%\\ 
%	&=v_\Omega(x)\cdot \sum_{i=1}^m \frac{1}{x-z_i},
%\end{align*}
%which is a polynomial of degree $\leq m-1$.

In both cases one has to limit the number $m$ of simultaneous queries to some maximum value $m_{max}$, satisfying 
\[
k+m_{max} < (1-\theta)\cdot n.
\]
For this it is sufficient to choose $k+m_{max} \leq  (1- \theta_0)\cdot n =\frac{k+n}{2}$, and hence $m_{max}\leq\frac{n-k}{2}$. 
Even with the lowest blow-up factor we have $n\geq 2\cdot k$, it is thus enough to demand
\begin{equation}
m_{max}\leq \frac{k}{2}.
\end{equation}
In our applications the bound on $m_{max}$ is trivially met, as only few values are queried in the run of the proof. 
Furthermore, given a polynomial we use multi-point queries of fixed given size $m\leq m_{max}$. 
As a consequence the maximum degree in the setup can be enlarged to $d_{max}= k+m-1$.  

\subsection{List commitments}

In the list decoding regime the situation is a bit more subtle. 
Running FRI for $\RS_k[F,D]$ with a proximity parameter $\frac{1-\rho}{2}<\theta<1-\sqrt\rho$  on an evaluation quotient
\[
h(x)=\frac{p(x)-v}{x-z}
\]
only proves agreement of $p$ with an evaluation-claim-consistent polynomial of degree $d^+ = k$ on a set of density greater than $\alpha = 1-\theta$. 
This might be not large enough for proving the polynomials of different runs of FRI being equal. 
In fact, they might differ from claim to claim, unless one runs a joint FRI argument on them. 
Assuming $\alpha >\sqrt{\rho^+}$, where $\rho^+ = \frac{k+1}{|D|}$, the Guruswami-Sudan list decoding bound shows that there might be 
\[
L\leq \frac{1}{2\cdot \eta\cdot \rho^+}
\]
such code words. 
This leads to the idea of list polynomial commitment schemes as in \cite{Redshift} with the following information-theoretic model: 
The prover sets up an oracle which contains a list of $l$, $1\leq l\leq L$, low-degree polynomials, and the oracle is allowed to choose which one to evaluate on a given query. 
Such extended notion is practical as security proofs in the oracle model are similar to polynomial oracle proofs.  
However, the notion of list polynomial oracles as given in \cite{Redshift} is not strong enough to capture correlated agreement, and as a consequence soundness error bounds are too coarse. 
For this reason we do not dive into formal details of that model, and instead directly work with DEEP algebraic linking.


\section{DEEP-ALI}

In this section we discuss the \textit{DEEP algebraic linking (DEEP-ALI)} \cite{DEEPFRI} and demonstrate its application to proving satisfiability of algebraic intermediate representations (AIR). 
Other representations such as randomized AIR or Plonk \cite{Plonk} can be treated similarly. 

\subsection{Algebraic linking and the DEEP method}
 
Algebraic linking transforms satisfiability of algebraic identities over algebraic subsets of $F$ into proximity problems of low-degree extensions to Reed-Solomon codes over ``outside'' domains (i.e. disjoint to the algebraic subset) . 
A family of functions $g_1,\ldots, g_N$ on $\Omega =\{x_1,...,x_n\}$  satisfies an algebraic identity 
\[
P(x, g_1(x),\ldots, g_N(x)) = 0  
\] 
on $\Omega$ ($P$ is a polynomial), if and only if their low-degree extensions $p_1(X)$, $\ldots$, $p_N(X)$ satisfy that $P(X, p_1(X), \ldots, p_N(X))$ is divisible by the vanishing polynomial $v_\Omega(X)=\prod_{i=1}^n (X-x_i)$ of $\Omega$ , i.e. the quotient 
\[
h(X)= \frac{P(X, p_1(X),..., p_N(X))}{v_\Omega(X)} 
\]
is a low-degree polynomial. 
This divisibility criterion is translated to the proximity of given code words 
\[
f_1,..., f_N, h\in F^D,
\]
(the honest prover chooses the domain evaluations of $p_1(X),\ldots, p_N(X)$ and $h(X)$ over $D$) to low-degree polynomials, i.e. a Reed-Solomon code words\footnotemark.
\footnotetext{%
In the case of a single batched FRI proof for the $f_i$ together with $h$, one needs to use degree correction factors as in Section \ref{s:naivePC}. 
}%
For this the proximity parameter needs to be chosen so that the agreement sets are large enough to infer from local satisfiability of algebraic identities to their satisfiability over the entire field $F$. 
%(Typically, degree correction factors as in Section \ref{} are used in order that the degree of $v_\Omega(X)$ does not affect .)   
This means that the sampling domain $D$ is such that the notion of low-degree is determined by the degree of $P(X,p_1(X),..., p_N(X))$. 
DEEP-ALI instead allows for decoupling the sampling domain size from the degree of $P$. 

DEEP-ALI is very much in alignment with a polynomial IOP for proving that
\begin{equation}
\label{e:PolyIdentity}
P(X,p_1(X),..., p_N(X)) = h(X)\cdot v_\Omega(X). 
\end{equation}
Instead of showing proximity of the quotient
\[
h(x) = \frac{P(x,f_1(x),..., f_N(x))}{v_\Omega(x)}
\]
to a low-degree polynomial, one samples a random point $z\sample F$ outside the domain $D$, and let the prover provide evaluations claims $v_i$, $i=1,\ldots,w$ for $p_i$, and $v$ for $h$, which are used to check the identity \eqref{e:PolyIdentity} at $X=z$.
The validity of the values are supported by proving proximity of the point evaluation quotients
\[
\frac{f_i(x)-v_i}{x-z}, \quad i=1,\ldots ,w,
\]
as well as
\[
\frac{h(x)-v}{x-z}
\]
to corresponding low-degree polynomials.
%If the low-degree test passes, and if the evaluation claims satisfy the identity \eqref{e:PolyIdentity} at $X=z$, then the verifier accepts.
%This can be done over a smaller domain than in algebraic linking, as the degree
Furthermore, by decomposing $h(X)$ into into polynomials of degree $|\Omega|-1$, e.g.
\begin{equation}
\label{e:SegmentPolys}
h(X)=h_0(X)+X^{|\Omega|}\cdot h_1(X)+ \ldots +X^{(d-1)\cdot |\Omega|}\cdot  h_{d-1}(X),
\end{equation}
one can even use a sampling domain the size of which is not determined by the degree of $h(X)$. 
(We use a different decomposition as in \cite{DEEPFRI, ethSTARK}, which does not imply any further constraints on the sampling space for $z$.)

In the unique decoding regime, the DEEP-ALI approach is equivalent to a (univariate) polynomial IOP using FRI as a polynomial commitment scheme as described in Chapter \ref{ch:Polycommit}. 
%Its soundness analysis is as for any polynomial IOP.  
For larger proximity parameters, one can generalize the polynomial oracle model to list polynomial commitment schemes as done in \cite{Redshift}, but their approach does not yield soundness bounds which are as tight as given by the correlated agreement theorem. 
In order not to introduce yet another oracle model which reflects this specific correlated agreement property of batched FRI, we directly show how to apply the DEEP method to proving satisfiability of an algebraic intermediate representation.


\subsection{DEEP-ALI of an AIR}
% \cite{Starks} has a more complex definition of AIR, imposing a fixed logical expression in the satisfiability booleans of the polynomial constraints.
% \cite{DEEPFRI} already uses only the conjuction of all polynomial constraints.

%We demonstrate the application of DEEP-ALI to an \textit{algebraic intermediate representation (AIR)} as in 
An \textit{algebraic intermediate representation (AIR)}, see \cite{Starks, DEEPFRI, ethSTARK}, is defined over an FFT domain $H\subset F$ with generator $g$. 
Each $x$ in $H$ carries a ``row'' of $w$ witnesses (or, ``columns'')
\[
(g_1(x),.., g_w(x)),
\]
on which a certain number of algebraic constraints are imposed.
For simplicity we restrict ourselves to constraints between neigboring rows only, i.e. polynomials 
\[
P_1, \ldots ,P_{C}\in F[X_1,...,X_w,Y_1,...,Y_w],  
\] 
each $P_i$ being imposed on a specified coset $a_i\cdot H_i\subseteq H$, where $H_i$ is a subgroup of $H$.
Hence satisfiability of the AIR is defined by 
\begin{equation}
\label{e:AIRConstraint}
P_i(x, g_1(x),\ldots ,g_w(x),g_1(g\cdot x), \ldots,g_w(g\cdot x)) = 0 \quad \forall x \in a_i\cdot H_i,
\end{equation}
for every $i=1,\ldots,C$.
In terms of polynomials $p_1(X),\ldots, p_w(X)\in F[X]$ extending the witness functions $g_1, \ldots, g_w$, satisfiability of an AIR constraint $P_i$ on $a_i\cdot H_i$ can be expressed by demanding the quotient
\[
\frac{P_i\big(p_1(X), \ldots, p_w(X), p_1(g\cdot X), \ldots, p_1(g\cdot X)\big) }{ v_{a_i\cdot H_i}(X)}
\]
where $v_{a_i\cdot H_i}(X) = z^{|H_i|} - a_i^{|H_i|}$ is the vanishing polynomial of the coset $a_i\cdot H_i$, being again a polynomial.
This is the approach \cite{Starks, DEEPFRI, ethSTARK}.
However, instead of working with these quotients we prefer using polynomial identities similar to Plonk \cite{Plonk}:
Satisfiability of an AIR constraint $P_i$ imposed on $a_i\cdot H_i$ is equvialent to 
\begin{equation}
\label{e:AIRConstraintIdentity}
s_i(X) \cdot P_i\big(p_1(X), \ldots, p_w(X), p_1(g\cdot X), \ldots, p_1(g\cdot X)\big) 
= 0 \bmod v_H(X),
\end{equation}
where  
\begin{equation}
\label{e:SelectorPoly}
s_i(X) =\frac{v_H(X)}{v_{a_i\cdot H_i}(X)}  \in F[X]
\end{equation}
is the \textit{selector polynomial}\footnotemark for the constraint $P_i$.
\footnotetext{%
Notice that, although $\deg s_i(X) \leq |H| -  1$, the polynomial $s_i(X)$ can be succinctly evaluated outside $H$ using the rational representation from \eqref{e:SelectorPoly}. 
Therefore no evaluation has to be provided by the prover.
}%
The \textit{overall degree} of the AIR is defined as
\begin{equation}
d = \max_i \deg(P_i),
\end{equation}
where $\deg(P_i)$ is the total degree of $P_i$.

The sampling domain $D$ for FRI is chosen so that $|D|=\beta\cdot |H|$, with a blow-up factor $\beta =1/\rho$ being a power of two, and $\RS_k[D,F]$ is the Reed-Solomon code of length $n= |D|$ and rate $\rho=\frac{k}{n}$, with
\begin{equation}
k = |H|.
\end{equation}
However, the agreement parameter used for FRI is taken slightly larger than $\alpha = \left(1+\frac{1}{2m}\right)\cdot\sqrt\rho$, $m\geq 3$, namely 
\begin{equation}
\alpha^+= \left(1+\frac{1}{2m}\right)\cdot \sqrt{\rho^+}, \quad m\geq 3, 
\end{equation}
where 
\begin{equation}
\rho^+=\frac{|H|+2}{|D|}.
\end{equation}
The reason for this slightly larger choice is due to the evaluation quotients of the protocol, which are subject to the FRI proof. 
Their denominators are at most quadratic and hence the degree of the non-quotients is bounded by $|H|-1 + 2$.
The low-degree extensions $p_i(X)\in F[X]$ of the witness functions $g_i$ on $H$ are provided as code words over $D$, and to use again the same code for a polynomial $h(X)$ of larger degree, we split it into segment polynomials as in \eqref{e:SegmentPolys}. 
%\begin{equation}
%\label{e:SegmentPolys}
%h(X)=h_0(X)+X^{|H|}\cdot h_1(X)+ \ldots +X^{(d-1)\cdot |H|}\cdot  h_{d-1}(X),
%\end{equation}


The DEEP-ALI protocol (for simplicity without zero-knowledge) for our AIR is as follows: 

\begin{protocol}[IOP for AIR using DEEP-ALI]
\label{p:DEEPAIR}
Let $p_1(X), \ldots, p_w(X)\in F[X]$ be polynomials of degree $\deg p_i(X)\leq |H|-1$ satisfying the AIR constraints \eqref{e:AIRConstraint}, $i=1,\ldots, C$.
\begin{enumerate}
\item
The prover sends the domain evaluation oracles  $[p_1], \ldots, [p_w]$ for $p_1(X)$, \ldots, $p_w(X)$ to the verifier, who responds with a randomness $\lambda\sample F$.
%
\item 
The prover computes $h_\lambda(X)\in F[X]$ of degree $\leq d \cdot (|H|-1)$ satisfying the identity
\begin{equation*}
\sum_{i=1}^{C} \lambda^{i-1}\cdot s_i(X)\cdot P_i(p_1(X),\ldots ,p_w(X),p_1(gX), \ldots, p_w(gX)) 
= h_\lambda(X)\cdot v_H(X),
\end{equation*}
splits it into its segment polynomials $h_{\lambda, j}(X)$, $j=0,\ldots, d-1$, each of degree $\leq |H|-1$, as in \eqref{e:SegmentPolys}, and sends their domain evaluation oracles $[h_{\lambda,0}]$, \ldots, $[h_{\lambda,d - 1}]$ to the verifier.  
The overall identity to be proven is therefore
\begin{equation}
\label{e:OverallIdentityAIR}
\begin{aligned}
\sum_{i=1}^{C} \lambda^{i-1}\cdot s_i(X)\cdot P_i(p_1(X), \ldots, p_w(X), p_1(gX)&, \ldots, p_w(gX))
\\ 
&= v_H(X)\cdot \sum_{j=0}^{d-1} X^{j\cdot |H|}\cdot  h_{\lambda,j}(X).
\end{aligned}
\end{equation}
The verifier answers with a DEEP query, i.e. a random $z\sample F\setminus (D\cup H)$.
%
\item 
Upon receiving the DEEP query $z$, the prover sends the evaluation claims $(z,v_{i,1})$, $(g\cdot z, v_{i,2})$, $i=1,...,w$, for the witness polynomials $p_i(X)$, and $(z,v_j)$, $j=0,...,d-1$, for the segment polynomials $h_{\lambda, j}(X)$, to the verifier.
\item
Eventually, prover and verifier run batched FRI for proximity of the evaluation quotients
\begin{equation*}
%\Quotient(p_i,(z,v_{i,1}),(gz,v_{i,2})) &= 
\frac{p_ i(x)- V_i(x)}{(x-z)\cdot (x-gz)},
\end{equation*}
where $V_i(x)$ is determined from the evaluation claims as described in Section \ref{s:MultiPoint},
 $i=1,\ldots,w$, and
\begin{equation*}
%\Quotient(h_{\lambda, j}, (z,v_j)) &= 
\frac{h_{\lambda,j}(x)-v_j}{x-z}, 
\end{equation*}
$j=0,..,d-1$, to $RS_k[F,D]$, where the chosen agreement parameter is $\alpha^+$ as defined above. 
If FRI passes, and if the evaluation claims satisfy the overall identity \eqref{e:OverallIdentityAIR} at $X=z$, the verifier accepts. 
(Otherwise, it rejects.)
\end{enumerate}
\end{protocol}

\begin{rem}
Notice that the polynomial $s_i(X)$ can only be succinctly evaluated outside $H$.
For this reason that $H$ is excluded from the sampling space of $z$.
\end{rem}

\begin{rem}
As discussed above, our definition of AIR is equivalent to the one from \cite{Starks, DEEPFRI, ethSTARK} (besides that we restricted to constraints between neighboring rows in order to keep the presentation simple). 
In particular the quotient polynomial $h_\lambda(X)$ in our protocol is the same as
\begin{multline*}
\sum_{i=1}^{C} \lambda^{i-1} \cdot\frac{ s_i(X)}{v_H(X)}\cdot P_i(p_1(X),\ldots ,p_w(X),p_1(gX), \ldots, p_w(gX)) =
\\
\sum_{i=1}^{C} \lambda^{i-1} \cdot \frac{P_i(p_1(X),\ldots ,p_w(X),p_1(gX), \ldots, p_w(gX))}{v_{a_i\cdot H_i}(X)},
\end{multline*}
which is the batched rational function used in their line of work.
\end{rem}
\begin{rem}
Let us point us the difference of Protocol \ref{p:DEEPAIR} to the IOP given in \cite{ethSTARK}.
Instead of using a purely linear batching strategy, we use the powers of a single randomness $\lambda$, which is the favoured choice in the context of proof composition.
Secondly, as in \cite{DEEPFRI} we use multi-point quotients for the witness polynomials which are queried at $z$ and $gz$. 
This reduces the number of polynomials on which FRI is applied, at the cost of only a slight increase in the choice of $k^+$.
Thirdly, the way we decompose $h_\lambda(X)$ into segment polynomials  \eqref{e:SegmentPolys} does not further reduce the sampling space for $z$, as is needed when using a FRI-like decomposition.
%\[
%h(X) = h_0(X^d) + X \cdot h_1(X^d) + \ldots + X^{d-1}\cdot h_{d-1}(X^d).
%\] 
\end{rem}

We finally state the soundness error of Protocol \ref{p:DEEPAIR} in the oracle model.
\begin{thm}[DEEP-ALI soundness]
\label{thm:DEEPsoundness} 
The above oracle proof for AIR satisfiability has soundness error 
\begin{equation}
\label{e:SoundnessDEEPALI}
%\varepsilon < L^+ \cdot \frac{d\cdot (k^+ - 1) + C}{|F|} + \varepsilon_{FRI},
\varepsilon \leq L^+ \cdot \left(\frac{C}{|F|} + \frac{d\cdot (k^+ - 1) + (k - 1)}{|F| - |D\cup H|}\right) + \varepsilon_{FRI},
\end{equation}
with  $k^+ = k+2$,  $L^+ = \frac{m+\frac{1}{2}}{\sqrt{\rho^+}}$ , $\rho^+=\frac{k^+}{n}$, and $\varepsilon_{FRI}$ being the soundness error for batched FRI for $\alpha^+$-agreement with $RS_k[F,D]$, Theorem \ref{thm:BatchedFRISoundness}.
\end{thm}

\begin{rem}
We point out some differences to the error bound in \cite{ethSTARK}, Theorem 4.
In our bound the list size bound $L^+$ only occurs linearly instead of quadratically. 
This due to our more careful analysis of the consequences of the correlated agreement enforced on polynomials produced in different rounds of the protocol. 
Secondly, as mentioned above, the alternative decomposition of $h_{\lambda}(X)$ into segment polynomials does not reduce the sampling space for $z$ by a factor $d$ larger domain.  
Less importantly, since we use do algebraic batching using the powers of $\lambda$, the first term incorporates the number of constraints $C$. 
A purley linear batching strategy, as used in \cite{ethSTARK} leads to $\frac{1}{|F|}$ instead.
%We further favor multi-point evaluation quotients instead of singe-point ones, which slightly increases the overall degree 
\end{rem}

\begin{rem}
 In the soundness error formula in  \cite{DEEPFRI}, Theorem 15 , the list bound $L^+$ occurs quadratically. 
This is due to the application of two separate FRI arguments, one for the batched quotients of the witness polynomials, and another one for the overall quotient polynomial.  
(However, the splitting technique for $h$ is outlined in Section 5.5.~therein.)
For the same reason, the notion of list polynomial commitment schemes from \cite{Redshift} would lead to the  $w$-th power of $L^+$, $w$ being the number of witness columns.
This might be acceptable for proving soundness of standard Plonk in the list polynomial oracle model, but not for a larger number of witness columns. 
\end{rem}

\begin{proof}[Proof of Theorem \ref{thm:DEEPsoundness}]
Let us denote $\varepsilon_1=L^+ \cdot \frac{C}{|F|}$, $\varepsilon_2 = L^+ \cdot  \frac{d\cdot (k^+ - 1) + (k - 1)}{|F| - |D\cup H|}$ , and $\varepsilon_3=\varepsilon_{FRI}$. 
Suppose that $P^*$ is an adversary which succeeds the verifier with a probability exceeding $\varepsilon =\varepsilon_1+\varepsilon_2+\varepsilon_3$ .
Then there exists a first message of $P^*$, i.e. words  $f_1, \ldots, f_w$  on $D$, on which $P^*$ succeeds with probability $> \varepsilon$ , and hence
\[
\Pr\big[\lambda : \Pr\left(P^*\text{ succeeds } |\lambda\right) > \varepsilon_2+\varepsilon_3 \big] > \varepsilon_1.
\]
(Otherwise $\Pr[P^*\text{ succeeds }] \leq 1\cdot \varepsilon_1+ (\varepsilon_2 + \varepsilon_3)\cdot (1-\varepsilon_1) < \varepsilon_1+ \varepsilon_2 + \varepsilon_3$.) 
Likewise, for every such ``good'' $\lambda$ (by the definition of $\varepsilon_1$, there are at least $L^+\cdot C$ many) there exists a second message of $P^*$, i.e. words $h_{\lambda, 0}, \ldots, h_{\lambda, d-1}$ on $D$ such that 
\[
\Pr\big[ z\in F\setminus D :  \Pr(P^*\text{ succeeds }| z) >\varepsilon_3 \big] > \varepsilon_2.
\]
For each such ``good'' $z\in F\setminus (D\cup H)$ (by the definition of $\varepsilon_2$, there are more than $L^+\cdot \left(d\cdot (k^+ - 1) + (k - 1)\right)$ many) the evaluation claims pass the verifier checks, and moreover the soundness of FRI enforces the evaluation quotients
\[ 
\left( 
\frac{f_1(x)-V_1(x)}{(x-z)\cdot (x-g\cdot z)}, \ldots, \frac{f_w(x)-V_w(x)}{(x-z)\cdot (x-g\cdot z)}, \frac{h_{\lambda,0}(x)- v_0}{x-z}, \ldots , \frac{h_{d - 1}(x) -v_{d-1}}{x-z}
\right)
\]
to have correlated agreement with some $q_i(X)\in F[X]$, $i=1,...,w +d$, of degree $\deg q_i(X) \leq |H|-1$ on a set $A$ of density at least 
$
\alpha^+ %= \left(1+\frac{1}{2m}\right)\cdot \sqrt{\rho^+} 
> \sqrt{\rho^+}.
$ 
Cancelling out the denominators, we see that 
\[
(f_1,\ldots, f_w, h_{\lambda,0}, \ldots, h_{\lambda,d-1})
\]
have correlated agreement on a set of density $\geq \alpha^+$ with some  element from $F[X]^{w+d}$
%\[
%(p_1, \ldots, p_w, q_{\lambda,0}, \ldots, q_{\lambda,d-1}) \in ,
%\] 
where each component polynomial is of degree $\leq |H| - 1 + 2 = k^+ -1$, and satisfies the evaluation claim.

In what follows we shall call an element $(P_0(X),\ldots, P_{l-1}(X))$ from  $F[X]^{l}$, with component polynomials of degree $\leq k^+ - 1$, having correlated agreement with a vector of functions $(\phi_0(x),\ldots, \phi_{l-1}(x))$ on a set of density $\geq \alpha^+$, an \textit{$\alpha^+$-configuration} for that vector of functions. 
Another way to express this, is that 
\[
P(X) = \sum_{i=0}^{l-1} P_i(X)\cdot Z^{i},
\]
belonging to the Reed-Solomon code $\RS_{k^+}[K,D]$ over the rational function field $K=F(Z)$ is $(1-\alpha^+)$-close to the $K$-valued function $\phi(x) =\sum_{i=0}^{l-1} \phi_i(X)\cdot Z^{i}$.
Note that since $\alpha^+ > \sqrt{\rho^+}$, the Guruswami-Sudan list size bound (over general fields, see Appendix \ref{s:ListDecoding}) is applicable to $\RS_{k^+}[K,D]$. 
In particular,  there are at most 
\[
L^+ = \frac{m+\frac{1}{2}}{\sqrt{\rho^+}}
\]
$\alpha^+$-configurations for $(\phi_0(x),\ldots, \phi_{l-1}(x))$.

Let us keep a combination of ``good'' first and second messages  $(f_1,\ldots,f_w)$, $(h_{\lambda,0},\ldots,h_{\lambda,d-1})$ fixed.
We have seen above that the existence of a single ``good'' $z$ implies the existence of an $\alpha^+$-configuration for $(f_1, \ldots, f_w, h_{\lambda,0}, \ldots$, $h_{\lambda,d-1})$.
%In particular each function in the two messages is $(1-\alpha^+)$-close to a word from $\RS_{k^+}[F,D]$ . 
%(For now, we ignore the evaluation property of these polynomials.)
%
By the Guruswami-Sudan list size bound for $\RS_{k^+}(K,D)$  (see Appendix \ref{s:ListDecoding}) there are at most $ L^+$  such $\alpha^+$-configurations.
%Note that each ``$j$-configuration'' must extend\footnotemark one of the ``$i$-configurations'' (but not necessarily each ``$i$-configuration'' has an extending ``$j$-configuration'').
%\footnotetext{%
%We say that ``$j$-configuration'' extends an ``$i$-configuration'' if the first $w$ polynomials coincide.
%However, the correlated agreement sets may shrink.
%}%
However, since there are more than $L^+\cdot \left(d\cdot (k^+ - 1) + (k - 1)\right)$ many ``good'' $z$, and each establishes an $\alpha^+$-configuration which smoreover evaluates to the claimed values, we conclude from the pigeon-hole principle that there is at least one $\alpha^+$-configuration, 
\[
(p_1,\ldots, p_w, q_{\lambda, 0}, \ldots, q_{\lambda,d-1})\in F[X]^{w+d},
\] 
for which the overall identity \eqref{e:OverallIdentityAIR} (taking the $q_{\lambda,j}$ as $h_{\lambda,j}$ therein) holds at more than $d\cdot (k^+-1) + (k - 1)$ many $z$. 
By the degree of the identity, this configuration is a solution of it, hence $(p_1,\ldots, p_w)\in F[X]^w$ is an $\alpha^+$-configuration for $(f_1,\ldots, f_w)$ which satisfies
\begin{equation}
\label{e:OverallIdentityAIRmodH}
\sum_{i=1}^{C} \lambda^{i-1}\cdot s_i(X)\cdot P_i(p_1(X), \ldots, p_w(X), p_1(gX), \ldots, p_w(g X)) 
= 0 \bmod v_H(X).
\end{equation}
%Hence for each good $(f_1,\ldots, f_w)$ and good $\lambda$, there exists a $(p_1,\ldots, p_w)\in F[X]^w$ which solves the  has correlated agreement with $(f_1,\ldots,f_w)$ on a set of density $\geq\alpha^+$, we call it a ``$w$-configuration''.
%for the given $\lambda$.
%

Now let us keep a ``good'' first message $(f_1,\ldots, f_w)$ fixed.
We have seen that for each ``good'' $\lambda$ there exists an \textit{$\alpha^+$-configuration} for $(f_1,\ldots, f_w)$ which  is a solution of \eqref{e:OverallIdentityAIRmodH}.
Again, by the Guruswami-Sudan list size bound for $\RS_{k^+}[K,D]$, there can be at most $L^+$ many $w$-configurations.
Since there are at least $L^+\cdot C$ many ``good'' $\lambda$, we conclude again from the pigeon-hole principle that there is at least one $\alpha^+$-configuration, which we again denote by $(p_1,\ldots, p_w)$, for which there are at least $C$ many ``good'' $\lambda$ for which \eqref{e:OverallIdentityAIRmodH} holds.
By linear algebra (the Vandermonde matrix is invertible) we conclude that this configuration  satisfies
\[
s_i(X)\cdot P_i(X, p_1(X), \ldots, p_w(X), p_1(gX), \ldots, p_w(g X)) = 0 \bmod v_H(X)
\]
for every $i =1, \ldots, C$.
The values of $(p_1,\ldots, p_w)$ over $H$ satisfy the constraints the AIR.
This completes the proof.
%
% old version of the proof
%
%First of all, each $f_j(x)$, $j=1,\ldots,w$, agrees on $A$ with $p_j(X) :=V_j(X) +q_j(X)\cdot (X-z)\cdot(X-g\cdot z)$, where $\deg p_j(X)\leq k^+ - 1$. 
%This shows $\alpha^+$-agreement of 
%\[
%f(x)= \sum_{j=1}^w f_j(x)\cdot Z^{j-1}
%\] 
%to a codeword from $RS_{k^+}[K,D]$ over the rational function field $K=F(Z)$, where $\alpha^+ > \sqrt{\rho^+}$. 
%By the Guruswami-Sudan list decoding bound for Reed-Solomon codes (over countable fields , see Appendix \ref{}), there can exist at most $m_1\leq L^+$ , such ``configurations''
%\[ 
%L_1 = \left\{ \left(p_1^{(i)}, \ldots, p_w^{(i)} \right) \in F[X]^w \: : \:  i=1,\ldots,m_1 \right\},
%\]
%each $p_j^{(i)}(X)$ of degree $\leq k^+ - 1$ which agree with $(f_1, \ldots, f_w)$ on a set of density $\geq \alpha^+$,  and moreover evaluate to the claimed values at a ``good'' $z$, $g\cdot z$, 
%(We know from above that $m_1\geq 1$.)
%By the same reasoning there exist  at most $m_2\leq L^+$, ``configurations''
%\[ 
%L_2 = \left\{ \left(p_1^{(i)}, \ldots, p_w^{(i)}, h_{\lambda,0}^{(i)}, \ldots , h_{d-1}^{(i)} \right)\in F[X]^{w+d} \: : \:  i=1,\ldots ,m_2 \right\},
%\]
%again each $p_j^{(i)}(X)$ and $h_{\lambda,j}^{(i)}(X)$ of degree $\leq k^+ - 1$, which evaluate to the claimed values and 
%agree with $(f_1, \ldots ,f_w, h_{\lambda,0},\ldots , h_{\lambda, d-1})$ on a set of density $\geq \alpha^+$.
%(Again, we know from above that $m_2\geq 1$.)
%Note that each of the configuration in $L_2$ must be an extension\footnotemark of one of the configuration in $L_1$ (but not necessarily each configuration from $L_1$ is extended to a configuration from $L_2$).
%\footnotetext{%
%A configuration from $L_2$ extend another from $L_1$ if the first $w$ polynomial coincide. However, the correlated agreement sets may shrink.
%}% 
%Since there are  more than $L^+\cdot d\cdot (k^+ - 1)$ many good $z$, we conclude from the pigeon-hole principle that there is at least one configuration in $L_2$ for which the overall identity holds at more than $d\cdot (k^+-1)$ many $z$. 
%By the degree of the overall identity, this configuration is a solution of the polynomial identity. 
%As this holds for every good $\lambda$, and there are at least $L^+\cdot C$ many, we conclude again by the pigeon-hole principle that there is at least one configuration $(p_1^{(i)}, \ldots,p_w^{(i)})$ from $L_1$ for which there are $C$ many ``good'' $\lambda$ with an extending configuration from $L_2$ which solves the overall polynomial identity. 
%By linear algebra we conclude that this configuration $(p_1^{(i)}, ...,p_w^{(i)})$ solves all $C$ identities of the AIR. 
%This completes the proof.
\end{proof}

We note that in Equation \eqref{e:SoundnessDEEPALI}, the term in the brackets is exactly the soundness error bound of the protocol in the (univariate) polynomial IOP model \cite{DARK}.
As soundness in this model is essentially based on the Schwartz-Zippel lemma, we believe that the blow-up by the factor $L^+$ holds in general for every (public coin) polynomial IOP when replacing polynomial oracles by domain-evaluation oracles. 
(At least for the polynomial IOPs we know, such as \cite{Plonk, Sonic, Marlin} or \cite{Darlin}, this is the case.)
Such a general transformation of (univariate) polynomial IOPs into ordinary (i.e. domain-evaluation) IOPs would be of interest, as the polynomial IOP model is widely used by practicioners.   
The protocol design as well as its security analysis is much easier to understand in the polynomial oracle model, and their soundness error bounds could be easily taken over.
We plan to elaborate on this in a separate document.


\subsubsection{Extractability}

We only provide a brief sketch how to build the extractor in the oracle model, given a prover $P^*$ which succeeds with a probability of that exceeds the soundness error bound from Theorem \ref{thm:DEEPsoundness}:
\begin{enumerate}
\item
Sample a ``good'' first message $[f_1],\ldots , [f_w]$ on which the prover succeeds with a probability greater than the soundness error bound from Theorem \ref{thm:DEEPsoundness}.

\item
In this step we build a straight-line extractor from the ``good'' first message $[f_1],\ldots , [f_w]$ pbtained in Step (1):
Read $f_1, \ldots ,f_w$ from the oracles. 
By the proof of Theorem \ref{thm:DEEPsoundness}, $(f_1,\ldots ,f_w)$ agrees with an AIR solution $(p_1(X),\ldots ,p_w(X))\in F[X]^w$ on a set $A$ of density $\geq \alpha^+$. 
To obtain this solution, one repeatedly applies the Guruswami-Sudan list decoder\footnotemark and   ``intersects'' their outputs as described in \cite{ethSTARK}, Section 5.5.
One of the resulting configurations must be the one that satisfies the AIR.
\footnotetext{%
Alternatively one could run the  Guruswami-Sudan list decoder over $K=F(Z)$. 
However, its run-time analysis in the number of operations over $F$ is probably more difficult. 
}%
\end{enumerate}

The first step takes expected time $O\left(\nicefrac{1}{\varepsilon}\right)$, and the Gurswami-Sudan decoder consumes at most $O\left(|D|^{15}\right)$ field operations, see Remark \ref{rem:GuruswamiSudanTime}.
To obtain strict polynomial running time, at the cost of having a success probability $<1$, one may stop the sampling after an appropriate multiple of $\nicefrac{1}{\varepsilon}$.
 
\subsection{Boosting soundness}
In this section we outline standard techniques to lower the DEEP-ALI soundness error for AIRs over small fields $F$. 
(See \cite{ethSTARK}, or \cite{PolygonZero}.)

\subsubsection{Using extension fields}
One simply draws queries (for example the DEEP queries and the FRI challenges) from a suitable large extension field $F_e$ of $F$.
The soundness error bound lowers accordingly, replacing $|F|$ with $|F_e|$. 
(Notice that the disadvantage of applying this approach to the entire protocol is that all FRI quotients have to be computed over $F_e$.)
 
\subsubsection{Increasing the number of protocol challenges}
Instead of drawing protocol challenges from an extension field, one may repeatedly sample a challenge and run the remaining protocol for them in parallel. 
For instance, the first verifier challenge $\lambda$ can be sampled $N_1$ times, $\lambda_1,\ldots, \lambda_{N_1}\sample F$, and prove the overall polynomial identity \eqref{e:OverallIdentityAIR} for all of these cases.
This yields a lowered soundness error bound of the first round,
\[
\varepsilon_1 = \left(L^+\cdot\frac{C}{|F|}\right)^{N_\lambda},
\]
and increases only the number of $h_{\lambda,j}$ polynomials  (by the factor $N_\lambda$) that are subject to the DEEP queries in the second round.
Likewise, one may also take several DEEP queries $z_1,\ldots , z_{N_z}$ from $F\setminus D$, and apply FRI to the batch of all resulting quotients, lowering the soundness error bound of the second round to
\[
\varepsilon_2 = \left(L^+\cdot\frac{d\cdot (k^+ - 1) + (k - 1)}{|F\setminus (D\cup H)|}\right)^{N_z}.
\] 
However, this comes at the cost of increasing the entire batch for FRI by the factor $N_z$ (which might be acceptable in some applications, though).
On the contrary, resampling of FRI challenges would increase the proof size too much. 
Hence for  FRI extension field sampling is preferable.  

\subsection{Beyond the Johnson bound?}
\label{s:RSConjecture}

The conjectured soundness error for FRI alone (Conjecture \ref{con:FRIsoundness}) is not good enough to argue the security of DEEP-ALI beyond the Guruswami-Sudan list decoding bound. 
For that reason we also cite a general conjecture on the list decodability of Reed-Solomon codes, which is used by Ben-Sasson et al. to conjecture the soundness error of DEEP-FRI up to capacity bound. 

\begin{conj}
(\cite{DEEPFRI}, Conjecture 21) 
Let $RS_k[F,D]$ be the Reed-Solomon code over a prime field $F=F_q$ with defining domain $D$ and rate $\rho=\frac{k}{|D|}$. 
Then there exists a constant $C_\rho$  such that for every $\theta =1-\rho -\eta$, with $\eta>0$, $RS_k[F,D]$ is list-decodable from a fraction of $\theta$ errors with list size
\[
L \leq\left(\frac{|D|}{\eta}\right)^{C_\rho}.
\]
\end{conj}
\begin{rem}
No concrete assumptions on the constant $C_\rho$ are made in \cite{DEEPFRI}.
\end{rem}

For quite large fields $F$ (compared to the block length $|D|=n$) there are linear codes which are list decodable up to capacity bound $1-\rho$, such as the \textit{folded Reed-Solomon codes} (see \cite{ListDecodingBook}, e.g.). 
In the case of a bounded alphabet, Guruswami \cite{ListDecodingBook} demonstrates binary linear codes which are list decodable to the Zyablow bound $\frac{1-\rho}{H}$ (here, $H$ is the entropy of the code) and uses such codes to construct examples that approach capacity bound, having list size $L=O\left(\frac{1}{\eta}\right)$. 

However, practitioners seem to avoid this conjecture. 
The ethSTARK documentation \cite{ethSTARK} takes a toy protocol as a representative for the entire DEEP-ALI of AIR,  whereas the plonky2 writeup \cite{PolygonZero} only sketches soundness in the polynomial oracle model, with no reference to list bounds.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\bibliographystyle{alpha}
\bibliography{bibfileSNARKs}

\appendix
\newpage

\section{Appendix}
\label{s:Appendix}

In this section we recap well-known facts on decodability of Reed-Solomon codes\footnotemark, and describe the weighted variant of Theorem \ref{thm:CorrelatedAgreement}, which is used by the soundness analysis of FRI.
\footnotetext{%
The survey by Guruswami \cite{ListDecodingBook} is a recommended source.
}

Unless contrary stated, we assume that $K$ is a \textit{general} field (finite, or infinite), and as for finite fields we shall call
\[ 
\RS_k[K,D]=\big\{p(x)|_{x\in D} \: :\: p(X)\in K[X], \deg p(X)\leq k-1 \big\}
\]
the Reed-Solomon code with rate $\rho=\frac{k}{|D|}$ and blocklength $n=|D|$. 
We say that a family of codes $\{V(n)\}$ of increasing blocklength $n$ is list decodable up to distance $\theta\in (0,1)$, if the maximum possible number of $\theta$-close codewords,
\[
L = \sup_{f\in K^D} \big| B(f,\theta)\cap V(n) \big|,
\]
is polynomial in the blocklength $n$. 
(Here,  $B(f,\theta)= \{w\in \RS_k[K,D] : \delta(f,w)< \theta\}$
 is the open -ball around $f$, and $\delta$ is the fractional Hamming distance.)   
As in the main part of the document, we throughout assume that both $n$ and $k$ are even.

\subsection{Berlekamp-Welch decoder}

Assume that  $f\in K^D$ is at most $\theta_0$--close to $V$, with
$\theta_0 = \frac{1-\rho}{2}$ being the unique decoding radius, 
and let $p(X)$ be the unique polynomial of degree $\leq k-1$ such that $\delta(f,p) \leq \theta_0$. 
Then the number of points of disagreement is at most $e=\frac{n-k}{2}$. 
The Berlekamp-Welch decoder \cite{BerlekampWelch} is based on the observation that if $\Omega =\{x_1,\ldots,x_e\}$ is the set of errors, and $E(x)=\prod_{x\in\Omega} (X-x)$ is its vanishing polynomial, then we have 
\[
E(x)\cdot f(x) = E(x)\cdot p(x)
\]
for all $x\in D$.

\begin{protocol}[Welch-Berlekamp decoding]
\label{p:BerlekampWelch} 
Let $K$ be a general field, and $V=\RS_k[K,D]$ be the Reed-Solomon code  of length $n= |D|$ and rate $\rho = \frac{k}{n}$.  
Assume any word $f\in K^D$. 
\begin{enumerate}
\item 
Find the coefficients of polynomials $E(X)$, $G(X)$ over $K$ with $\deg E(X)\leq e$, $\deg G(X)\leq k-1+e$, where $e=\frac{n-k}{2}$, such that
\[
	E(x)\cdot w(x) = G(x) \text{ for all } x\in D.
\] 
This linear system has at least one non-trivial solution which can be found in at most $O\left(n^3\right)$ field operations.
\\
{\tiny
This is a homogeneous linear system of $|D|=n$ equations in $k+2\cdot e+1=n+1$ unknown: 
The $e+1$ coefficients of $E(X)$ and the $k+e$ coefficients of $G(X)$.
}

Notice that for any such non-trivial solution $(E(X),G(X))$ both $E(X)$ and $G(X)$ must be non-trivial. 
\\
{\tiny
If one of the two would be identically zero, the size of $D$ the same is true for the other.
}

\item
\label{i:BerlekampWelch}
For any such non-trivial solution $(E(X),G(X))$ obtained in step 1, check if $G(X)$ is divisible by $E(X)$. 
If yes, then output $p(X)= \frac{G(X)}{E(X)}$.  
(If not, then abort.)
\end{enumerate}
\end{protocol}

For a word $f\in K^D$ with fractional Hamming distance of at most $\theta_0$, Step (\ref{i:BerlekampWelch}) of Protocol \ref{p:BerlekampWelch} always succeeds: 
Let $p(X)$ be the (unique) $\theta_0$-close code word. 
This polynomial agrees with $f$ on a set of size $a\geq\frac{n+k}{2}$. 
Consider the bivariate polynomial
\[
Q(X,Y):= Y\cdot E(X)-G(X). 
\]
Then $Q(X,p(X))$ is a univariate polynomial of degree 
\[
\deg Q(X,p(X)) \leq k-1+e =\frac{n+k}{2} - 1,
\] 
which by the assumption on $p(X)$ has at least $a$ zeroes. 
Consequently $Q(X,p(X))$ is trivial and $p(X)\cdot E(X)=G(X)$ holds as a formal identity. 
Since $E(X)$ is non-trivial, we conclude divisibility. 


\subsection{List decoding}
\label{s:ListDecoding}

\subsubsection{The Sudan decoder}

The Sudan list decoder \cite{Sudan} generalizes the Berlekamp-Welch procedure by searching for general bivariate polynomials $Q(X,Y)\in K[X,Y]$ which satisfy
\[
Q(x,f(x)) = 0 \text{ for all } x\in D.
\]
In order that $Y-p(X)$ is a factor of $Q(X,Y)$ for every polynomial $p(X)$ of degree $\leq d=k-1$ which has the claimed agreement set size with $f$,  one looks for such bivariate $Q$ so that the degree of $Q(X,p(X))$ for any such polynomial is smaller than the targeted agreement set size.

\begin{defn}
The \textit{$(1,d)$--weighted degree} (in short, \textit{$(1,d)$--degree}) of a monomial $X^i\cdot Y^j$ is $i+d\cdot j$. 
More generally, the $(1,d)$--weighted degree of a  bivariate polynomial $Q(X,Y)$ is the maximum of the weighted degrees of its monomials.
\end{defn}

A polynomial $Q(X,Y)$ of $(1,d)$--weighted degree $W$ is of the form
\[
Q(X,Y) = \sum_{i+d\cdot j\leq W, i,j\geq 0} c_{i,j} \cdot X^i\cdot Y^j,
\]
and its number of coefficients is 
\begin{align*}
\sum_{j=0}^{\floor{\nicefrac{W}{d}}} W - d\cdot j+1 &= (W+1)\cdot \left(\floor{\frac{W}{d}}+ 1\right) - d\cdot \frac{\floor{\frac{W}{d}}\cdot\left(\floor{\frac{W}{d}}+1\right)}{2}
\\
& \geq \left(\floor{\frac{W}{d}}+ 1\right)\cdot \left(W + 1 - \frac{W}{2}\right)
\\
& \geq \frac{(W+1)\cdot (W+2)}{2\cdot d}
\end{align*}
As a consequence, if this lower bound exceeds the number of linear equations $n=|D|$, the linear system has a non-trivial solution.  
In particular this holds for any 
\[
W \geq \floor{\sqrt{2\cdot d\cdot n}}.
\]

\begin{protocol}[Sudan list decoder] 
Assume that $K$ is a general field, and $\RS_k[K,D]$ is the Reed-Solomon code of lenght $n=|D|$ and rate $\rho=\frac{k}{n}$. 
Let $f\in K^D$, and choose an agreement parameter $a\in [0,n]$,
$a>\sqrt{2\cdot d \cdot n}$, where $d= k-1$.
\begin{enumerate}
\item 
Solve the linear system on the coefficients of $Q(X,Y)$ with $(1,d)$--degree $W = \floor{\sqrt{2\cdot d\cdot n}}$, given by the interpolation constraints
\[
Q(x,f(x)) = 0, \quad x\in D.
\]
This system has a non-trivial solution which is found in at most $O(n^3)$ field operations.
\\
{\tiny
Note that by construction, for any $\left(1-\frac{a}{n}\right)$--close code word $p(X)$ the irreducible polynomial $Y-p(X)$ divides $Q(X,Y)$.
This already proves that the list size $L \leq \floor{\frac{W}{d}} \leq \frac{\sqrt{2\cdot d\cdot n}}{d} = \sqrt{\frac{2\cdot n}{d}}$.
}

\item 
\label{i:Sudan}
Find all factors of $Q(X,Y)$ which are of the form
\[
Y-p(X),
\] 
with $p(X)$ being a polynomial over $K$ of degree at most $k-1$. 
There are at most $\sqrt{\frac{2\cdot n}{d}}$ such factors. 
Filter out those which agree with $f$ on at least $a$ points. 
\end{enumerate}
\end{protocol}

The efficiency of Step (\ref{i:Sudan}) depends on the field $K$. 
If $K$ is a finite field, then there are polynomial time algorithms (both probabilistic or deterministic) for finding such factors of the form $Y-p(X)$. (They both rely on univariate factorization, see \cite{ListDecodingBook}, e.g.) 
If $K$ is infinite, then this might not be true in general. 


\subsubsection{The Guruswami-Sudan decoder}

To extend the interpolation technique to the Johnson limit $1 - \sqrt{\rho}$, one takes into account that several close codewords might coincide at some points. 
One therefore looks for polynomials $Q(X,Y)$ the $(1,d)$--degree of which is $m$ times as large as the targeted agreement set would suggest, and which have a zero of order $m$ at every interpolating point $(x,f(x))$, $x\in D$. 
The parameter $m\geq 1$ is called \textit{multiplicity parameter}.

\begin{defn}
A polynomial $Q(X,Y)\in K[X,Y]$ is said to have a \textit{zero of order $m$} at the point $(x,y)$, if the polynomial $Q(X-x,Y-y)$ has no monomial of absolute degree $m$.
\end{defn}

Such polynomials $Q(X,Y)$ of $(1,d)$-weighted degree $W$ have still the property, that if $p(X)$ is a polynomial of $\deg p(X) \leq d$, then 
\[
\deg Q(X,p(X))\leq \frac{W}{m}.
\] 
Again, counting the number of coefficients and comparing with the number of interpolation constraints yields that whenever 
\[
\frac{(W+1)\cdot (W+2)}{d \cdot m \cdot (m+1)} > n,
\]
and hence in particular for
$
W \geq \floor{\sqrt{m\cdot (m+1) \cdot d\cdot n}}
$
there always exists such a (non-trivial) polynomial $Q(X,Y)$.
(For details, see \cite{ListDecodingBook}, e.g.)

\begin{protocol}[Guruswami-Sudan list decoder \cite{GuruswamiSudan}] 
\label{p:GuruswamiSudan}
Assume that $K$ is a general field, and $\RS_k[K,D]$ the Reed-Solomon code of length $n= |D|$ and rate $\rho=\frac{k}{n}$.
Let $f\in K^D$, and choose an agreement parameter $a\in [0,n]$,  
$a > \sqrt{\left(1 + \frac{1}{m}\right) \cdot d\cdot n}$,
%$\alpha = \frac{a}{n} >  \sqrt{\left(1+\frac{1}{m}\right)\cdot \frac{k}{n}}$, 
where $m$ is a positive integer (the \textit{multiplicity parameter}).
\begin{enumerate}
\item
Solve the linear system on the coefficients of  $Q(X,Y)$ with $(1,d)$--degree $W = \big[\sqrt{m\cdot (m+1)\cdot d\cdot n}\big]$: 
For each $x\in D$,
\[ 
Q(X,Y)\text{ has a zero of order $m$ at } (x, f(x)).
\]
Such a solution always exists and can be found in polynomially many field operations.
\\
{\tiny
By construction, again for any $\left(1-\frac{a}{n}\right)$--close code word $p(X)$ the irreducible polynomial $Y-p(X)$ divides $Q(X,Y)$.
This already proves that the list size $L\leq \frac{\sqrt{m\cdot (m+1)\cdot d\cdot n}}{d} < \sqrt{\frac{m\cdot (m+1)}{\rho}}$.  
}
\item
Find all factors of $Q(X,Y)$ which are of the form
\[
Y - p(X),
\] 
with $p(X)$ being a polynomial over $K$ of degree at most $d=k-1$. 
There are at most $\sqrt{\frac{m\cdot (m+1)}{\rho}}$ many. 
Filter out those which agree with $f$ on at least $a$ points.
\\
{\tiny
As before, this step might be efficient or not, depending on the field $K$.
}
\end{enumerate}
\end{protocol}

\begin{rem}
\label{rem:GuruswamiSudanTime}
Choosing the discriminant method to find factors of the form $Y- p(X)$, the Guruswami-Sudan list decoder taking at most
\[
O\left(\max\left\{\frac{d^3 \cdot n^6 \cdot a^6}{(a^2 - d\cdot n)^6}, \frac{a^6}{k^3}\right\}\right)
\]
field operations over $K$, see \cite{GuruswamiSudan}.
This is at most of order $O\left(|D|^{15}\right)$.
\end{rem}

%Note that in particular for the choice of $\theta =1 -\left(1+\frac{1}{2m}\right)\cdot\sqrt\rho$
Note that choosing 
\[
\alpha = \frac{a}{n}\geq \sqrt{\left(1+\frac{1}{m}\right)\cdot \rho}
\] 
implies a large enough agreement parameter for the Protocol \ref{p:GuruswamiSudan}.
In particular the choice $\alpha =\left(1+\frac{1}{2m}\right)\cdot\sqrt\rho$ used throughout the main part of the document is strong enough, since
\[
\left(1+\frac{1}{2\cdot m}\right)^2 = 1 + \frac{1}{m} +\frac{1}{4\cdot m^2} \geq 1+ \frac{1}{m}.
\]
Let us summarize the consequences of Protocol \ref{p:GuruswamiSudan}.

\begin{thm}[Guruswami-Sudan] 
Let $K$ be a general (possibly infinite) field, and
\[
\RS_k[K,D]= \big\{ p(x)|_{x\in D}\: : \: p(X)\in K[X], \deg(p) < |D| \big\}
\] 
the Reed-Solomon code of block length $n=|D|$ and rate $\rho=\frac{k}{n}$. 
Choos a proximity parameter $\theta =1- \left(1+\frac{1}{2\cdot m}\right)\cdot \sqrt{\rho}$ for some integer $m\geq 1$. 
Then $\RS_k[K,D]$ is list decodable for $\theta$ with list bound
\begin{equation}
\label{e:GuruswamiSudanListBound}
L \leq \sqrt{\frac{m\cdot (m+1)}{\rho}} \leq \frac{m+\frac{1}{2}}{\sqrt\rho}.
\end{equation}
If $K$ is finite, then the Guruswami-Sudan decoder runs in polynomial time.
\end{thm}


\subsection{Weighted correlated agreement}
\label{s:WeigthedCorrelatedAgreement}

%The soundness analysis of FRI uses a strengthening of the correlated agreement theorem, which allows to additionally keep track of the success probability for the FRI query phase by a sub-probability measure $\mu$. 

We say that a function $f\in F^D$ has \textit{$\mu$-agreement} of at least $\alpha$ with another function $g\in F^D$,
\[ 
\agree_\mu(f, g)>\alpha,
\]
if there is a set $A\subseteq D$ of measure $\mu(A)>\alpha$ on which both functions agree. 
Likewise we say that 
\[
\agree_\mu(f, \RS_k) > \alpha,
\] 
if there exists a $p \in \RS_k[F,D]$ for which $agree_\mu(f,p)>\alpha$.

\begin{thm}
(Full version of \cite{ProximityGaps}, Theorem 7.1) Let $\theta\in \left(\frac{1-\rho}{2},1-\sqrt\rho\right)$, where $\theta = 1-\sqrt\rho \cdot \left(1+\frac{1}{2m}\right)$, for some integer $m\geq 3$, and assume that $\mu$ is a sub-probability measure on $D$ with common denominator $M$, i.e. for all $x$ in $D$
\[
\mu(\{x\}) = \frac{a_x}{M},
\]
for an integer value $a_x$ . 
Suppose that for $f_0$, $f_1$, \ldots, $f_{N-1}\in F^D$, 
\begin{equation*}
\begin{aligned}
\frac{
	\big|\big\{ \lambda : \agree_\mu(f_0 + \lambda\cdot f_1+ \ldots +\lambda^{N-1}\cdot f_{N-1}, \RS_k)>\alpha \big\}\big|
	}
	{|F|}
\qquad\qquad\qquad
\\
> \max\left(\varepsilon, (N-1)\cdot \frac{M\cdot |D|+1}{|F|} \cdot \frac{2m+1}{\sqrt\rho} \right),
\end{aligned}
\end{equation*}
with $\varepsilon$ as in \eqref{e:epsilonJ}.
Then there exist polynomials $p_0(X)$, $p_1(X)$, \ldots , $p_{N-1}(X)$ from $\RS_k[F,D]$, and a set $A$ of density $\mu(A)>\alpha$ on which $f_i$ coincides with $p_i$ for all $i=0,\ldots, N-1$. 
\end{thm}


\end{document}