Is eieio a full or a lightweight fence?

In this note we demonstrate that considering eieio to be a full fence restricted to write-to-write pairs is not compatible with our hardware experiments. First notice that our model specifies that eieio (1) applies to pairs of stores only and (2) is a lightweight fence. We strenghen our model by considering eieio to be a full fence. We then compare the new FullEIEIO model with the original model.

As illustrated by the following table, the two models differ only by a few behaviours, which are all forbidden by FullEIEIO yet allowed by the original model. Fortunately, some of those behaviours are observed on hardware. Hence, as a conclusion, we consider eieio to be a lightweight fence, as the other option results in a model invalided by hardware.

Behaviours allowed by model and fobidden by FullEieio

There are 19 such tests
 FullEIEIOModelPowerG5Power6Power7
R+eieio+syncForbidAllowNo, 0/7.8GNo, 0/10GNo, 0/61G
   Allow unseenAllow unseenAllow unseen
WRR+2W+sync+eieioForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
W+RWC+eieio+addr+syncForbidAllowNo, 0/3.9GOk, 28/438MOk, 584/2.0G
   Allow unseen  
W+RWC+eieio+lwsync+syncForbidAllowNo, 0/3.9GOk, 37/448MOk, 545/2.0G
   Allow unseen  
Z6.0+eieio+addr+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.0+eieio+lwsync+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.3+eieio+eieio+addrForbidAllowNo, 0/3.9GOk, 63/315MOk, 2.4k/2.0G
   Allow unseen  
Z6.3+eieio+eieio+lwsyncForbidAllowNo, 0/3.9GOk, 141/279MOk, 2.5k/2.0G
   Allow unseen  
Z6.3+eieio+eieio+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.3+eieio+lwsync+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.3+eieio+sync+addrForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.3+eieio+sync+lwsyncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.3+eieio+sync+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.3+sync+eieio+addrForbidAllowNo, 0/3.9GOk, 54/373MOk, 1.7k/2.0G
   Allow unseen  
Z6.3+sync+eieio+lwsyncForbidAllowNo, 0/3.9GOk, 157/370MOk, 2.8k/2.1G
   Allow unseen  
Z6.4+eieio+sync+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.5+eieio+eieio+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.5+eieio+lwsync+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen
Z6.5+eieio+sync+syncForbidAllowNo, 0/3.9GNo, 0/1.0GNo, 0/27G
   Allow unseenAllow unseenAllow unseen

FullEIEIO model

We test this FullEIEIO model.

"Model with full eieio"
(* Uniproc *)
acyclic po-loc | rf | fr | co as uniproc

(* Utilities *)
let dd = addr | data
let rdw = po-loc & (fre;rfe)
let detour = po-loc & (coe ; rfe)
let addrpo = addr;po
(*******)
(* ppo *)
(*******)


(* Initial value *)
let ci0 = ctrlisync | detour
let ii0 = dd | rfi | rdw
let cc0 = dd | po-loc | ctrl | addrpo
let ic0 = 0

(* Fixpoint from i -> c in instructions and transitivity *)
let rec ci = ci0 | (ci;ii) | (cc;ci)
and ii = ii0 | ci | (ic;ci) | (ii;ii)
and cc = cc0 | ci | (ci;ic) | (cc;cc)
and ic = ic0 | ii | cc | (ic;cc) | (ii ; ic) (* | ci inclus dans ii et cc *)

let ppo = RW(ic) | RR(ii)


(**********)
(* fences *)
(**********)

(* Power *)
let lwsync = RM(lwsync)|WW(lwsync)
let eieio = WW(eieio)
show sync lwsync eieio

(* ARM *)
let dmb.st=WW(dmb.st)
let dsb.st=WW(dsb.st)
show dmb dsb dmb.st dsb.st

(* Common, all arm barriers are strong *)
let strong = sync|dmb|dsb|dmb.st|dsb.st|eieio
let light = lwsync

let fence = strong|light


(* extensions *)
let ppoext = (rfe;ppo)|(ppo;rfe)|(rfe;ppo;rfe)
let fenceext = (rfe;fence)|(fence;rfe)|(rfe;fence;rfe)

(* happens before *)
let hb = ppo | ppoext | fence | fenceext
let GHB = hb
acyclic hb as thinair

(* prop *)
let hbstar = hb*
let propbase = (fence|fenceext);hbstar

let chapo = rfe|fre|coe|(fre;rfe)|(coe;rfe)

let prop = WW(propbase)| (chapo? ; propbase*; strong; hbstar)

acyclic co|prop as propagation
irreflexive fre;prop;hbstar as causality


This document was translated from LATEX by HEVEA.