y ^ ( x ) : = w 0 + ∑ i = 1 n w i x i + ∑ i = 1 n ∑ j = i + 1 n ⟨ v i , v j ⟩ x i x j \hat{y}(\mathbf{x}) :=w_{0}+\sum_{i=1}^{n} w_{i} x_{i}+\sum_{i=1}^{n} \sum_{j=i+1}^{n}\left\langle\mathbf{v}_{i}, \mathbf{v}_{j}\right\rangle x_{i} x_{j} y^(x):=w0+i=1∑nwixi+i=1∑nj=i+1∑n⟨vi,vj⟩xixj
y ( x ) = w 0 + ∑ i = 1 n w i x i + ∑ i = 1 n ∑ j = i + 1 n ⟨ v i , f j , v j , f i ⟩ x i x j y(\mathbf{x})=w_{0}+\sum_{i=1}^{n} w_{i} x_{i}+\sum_{i=1}^{n} \sum_{j=i+1}^{n}\left\langle\mathbf{v}_{i, f_{j}}, \mathbf{v}_{j, f_{i}}\right\rangle x_{i} x_{j} y(x)=w0+i=1∑nwixi+i=1∑nj=i+1∑n⟨vi,fj,vj,fi⟩xixj
y ^ = sigmoid ( y F M + y D N N ) \hat{y}=\operatorname{sigmoid}\left(y_{F M}+y_{D N N}\right) y^=sigmoid(yFM+yDNN) y F M = ⟨ w , x ⟩ + ∑ i = 1 d ∑ j = i + 1 d ⟨ V i , V j ⟩ x i ⋅ x j y_{F M}=\langle w, x\rangle+\sum_{i=1}^{d} \sum_{j=i+1}^{d}\left\langle V_{i}, V_{j}\right\rangle x_{i} \cdot x_{j} yFM=⟨w,x⟩+i=1∑dj=i+1∑d⟨Vi,Vj⟩xi⋅xj y D N N = W ∣ H ∣ + 1 ⋅ a ∣ H ∣ + b ∣ H ∣ + 1 y_{D N N}=W^{|H|+1} \cdot a^{|H|}+b^{|H|+1} yDNN=W∣H∣+1⋅a∣H∣+b∣H∣+1
y ∧ = σ ( w linear T a + w dnn T x dnn k + w c i n T p + + b ) y^{\wedge}=\sigma\left(w_{\text {linear}}^{T} a+w_{\text { dnn }}^{T} x_{\text {dnn}}^{k}+w_{c i n}^{T} p^{+}+b\right) y∧=σ(wlinearTa+w dnn Txdnnk+wcinTp++b)
P ( Y = 1 ∣ x ) = σ ( w w i d e T [ x , ϕ ( x ) ] + w d e e p T a ( l f ) + b ) P(Y=1 | \mathbf{x})=\sigma\left(\mathbf{w}_{w i d e}^{T}[\mathbf{x}, \phi(\mathbf{x})]+\mathbf{w}_{d e e p}^{T} a^{\left(l_{f}\right)}+b\right) P(Y=1∣x)=σ(wwideT[x,ϕ(x)]+wdeepTa(lf)+b)
ϕ G M F = p u G ⊙ q i G \phi^{G M F}=\mathbf{p}_{u}^{G} \odot \mathbf{q}_{i}^{G} ϕGMF=puG⊙qiG ϕ M L P = a L ( W L T ( a L − 1 ( … a 2 ( W 2 T [ p u M q i M ] + b 2 ) … ) ) + b L ) \phi^{M L P}=a_{L}\left(\mathbf{W}_{L}^{T}\left(a_{L-1}\left(\ldots a_{2}\left(\mathbf{W}_{2}^{T} \left[ \begin{array}{c}{\mathbf{p}_{u}^{M}} \\ {\mathbf{q}_{i}^{M}}\end{array}\right]+\mathbf{b}_{2}\right) \ldots\right)\right)+\mathbf{b}_{L}\right) ϕMLP=aL(WLT(aL−1(…a2(W2T[puMqiM]+b2)…))+bL) y ^ u i = σ ( h T [ ϕ G M F ϕ M L P ] ) \hat{y}_{u i}=\sigma\left(\mathbf{h}^{T} \left[ \begin{array}{c}{\phi^{G M F}} \\ {\phi^{M L P}}\end{array}\right]\right) y^ui=σ(hT[ϕGMFϕMLP])
y ^ A F M ( x ) = w 0 + ∑ i = 1 n w i x i + p T ∑ i = 1 n ∑ j = i + 1 n a i j ( v i ⊙ v j ) x i x j a i j ′ = h T Re L U ( W ( v i ⊙ v j ) x i x j + b ) a i j = exp ( a i j ′ ) ∑ ( i , j ) ∈ R x exp ( a i j ′ ) \begin{aligned} \hat{y}_{A F M}(\mathbf{x}) &=w_{0}+\sum_{i=1}^{n} w_{i} x_{i}+\mathbf{p}^{T} \sum_{i=1}^{n} \sum_{j=i+1}^{n} a_{i j}\left(\mathbf{v}_{i} \odot \mathbf{v}_{j}\right) x_{i} x_{j} \\ a_{i j}^{\prime} &=\mathbf{h}^{T} \operatorname{Re} L U\left(\mathbf{W}\left(\mathbf{v}_{i} \odot \mathbf{v}_{j}\right) x_{i} x_{j}+\mathbf{b}\right) \\ a_{i j} &=\frac{\exp \left(a_{i j}^{\prime}\right)}{\sum_{(i, j) \in \mathcal{R}_{x}} \exp \left(a_{i j}^{\prime}\right)} \end{aligned} y^AFM(x)aij′aij=w0+i=1∑nwixi+pTi=1∑nj=i+1∑naij(vi⊙vj)xixj=hTReLU(W(vi⊙vj)xixj+b)=∑(i,j)∈Rxexp(aij′)exp(aij′)
y ^ = σ ( W 3 l 2 + b 3 ) \hat{y}=\sigma\left(\boldsymbol{W}_{3} l_{2}+b_{3}\right) y^=σ(W3l2+b3) l 2 = relu ( W 2 l 1 + b 2 ) l 1 = relu ( l z + l p + b 1 ) \begin{array}{c}{l_{2}=\operatorname{relu}\left(\boldsymbol{W}_{2} \boldsymbol{l}_{1}+\boldsymbol{b}_{2}\right)} \\ {l_{1}=\operatorname{relu}\left(\boldsymbol{l}_{z}+\boldsymbol{l}_{p}+\boldsymbol{b}_{1}\right)}\end{array} l2=relu(W2l1+b2)l1=relu(lz+lp+b1) l z = ( l z 1 , l z 2 , … , l z n , … , l z D 1 ) , l z n = W z n ⊙ z l p = ( l p 1 , l p 2 , … , l p n , … , l p D 1 ) , l p n = W p n ⊙ p \begin{aligned} l_{z}=\left(l_{z}^{1}, l_{z}^{2}, \ldots, l_{z}^{n}, \ldots, l_{z}^{D_{1}}\right), & l_{z}^{n}=\boldsymbol{W}_{z}^{n} \odot \boldsymbol{z} \\ l_{p}=\left(l_{p}^{1}, l_{p}^{2}, \ldots, l_{p}^{n}, \ldots, l_{p}^{D_{1}}\right), & l_{p}^{n}=\boldsymbol{W}_{p}^{n} \odot \boldsymbol{p} \end{aligned} lz=(lz1,lz2,…,lzn,…,lzD1),lp=(lp1,lp2,…,lpn,…,lpD1),lzn=Wzn⊙zlpn=Wpn⊙p
y ^ N F M ( x ) = w 0 + ∑ i = 1 n w i x i + h T σ L ( W L ( … σ 1 ( W 1 f B I ( V x ) + b 1 ) … ) + b L ) \begin{aligned} \hat{y}_{N F M}(\mathbf{x}) &=w_{0}+\sum_{i=1}^{n} w_{i} x_{i} +\mathbf{h}^{T} \sigma_{L}\left(\mathbf{W}_{L}\left(\ldots \sigma_{1}\left(\mathbf{W}_{1} f_{B I}\left(\mathcal{V}_{x}\right)+\mathbf{b}_{1}\right) \ldots\right)+\mathbf{b}_{L}\right) \end{aligned} y^NFM(x)=w0+i=1∑nwixi+hTσL(WL(…σ1(W1fBI(Vx)+b1)…)+bL) f B I ( V x ) = 1 2 [ ( ∑ i = 1 n x i v i ) 2 − ∑ i = 1 n ( x i v i ) 2 ] f_{B I}\left(\mathcal{V}_{x}\right)=\frac{1}{2}\left[\left(\sum_{i=1}^{n} x_{i} \mathbf{v}_{i}\right)^{2}-\sum_{i=1}^{n}\left(x_{i} \mathbf{v}_{i}\right)^{2}\right] fBI(Vx)=21⎣⎡(i=1∑nxivi)2−i=1∑n(xivi)2⎦⎤
p ( y = 1 ∣ x ) = ∑ i = 1 m exp ( u i T x ) ∑ j = 1 m exp ( u j T x ) ⋅ 1 1 + exp ( − w i T x ) p(y=1 | x)=\sum_{i=1}^{m} \frac{\exp \left(u_{i}^{T} x\right)}{\sum_{j=1}^{m} \exp \left(u_{j}^{T} x\right)} \cdot \frac{1}{1+\exp \left(-w_{i}^{T} x\right)} p(y=1∣x)=i=1∑m∑j=1mexp(ujTx)exp(uiTx)⋅1+exp(−wiTx)1
p = σ ( [ x L 1 T , h L 2 T ] w logits ) p=\sigma\left(\left[\mathbf{x}_{L_{1}}^{T}, \mathbf{h}_{L_{2}}^{T}\right] \mathbf{w}_{\text { logits }}\right) p=σ([xL1T,hL2T]w logits ) h l + 1 = f ( W l h l + b l ) \mathbf{h}_{l+1}=f\left(W_{l} \mathbf{h}_{l}+\mathbf{b}_{l}\right) hl+1=f(Wlhl+bl) x l + 1 = x 0 x l T w l + b l + x l = f ( x l , w l , b l ) + x l \mathbf{x}_{l+1}=\mathbf{x}_{0} \mathbf{x}_{l}^{T} \mathbf{w}_{l}+\mathbf{b}_{l}+\mathbf{x}_{l}=f\left(\mathbf{x}_{l}, \mathbf{w}_{l}, \mathbf{b}_{l}\right)+\mathbf{x}_{l} xl+1=x0xlTwl+bl+xl=f(xl,wl,bl)+xl x 0 = [ x embed, 1 T , … , x embed , k T , x dense T ] \mathbf{x}_{0}=\left[\mathbf{x}_{\text { embed, } 1}^{T}, \ldots, \mathbf{x}_{\text { embed }, k}^{T}, \mathbf{x}_{\text { dense }}^{T}\right] x0=[x embed, 1T,…,x embed ,kT,x dense T]