LCOV - code coverage report
Current view: top level - src - llvm-late-gc-lowering.cpp (source / functions) Hit Total Coverage
Test: [build process] commit ef510b1f346f4c9f9d86eaceace5ca54961a1dbc Lines: 1270 1562 81.3 %
Date: 2022-07-17 01:01:28 Functions: 68 79 86.1 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 974 1320 73.8 %

           Branch data     Line data    Source code
       1                 :            : // This file is a part of Julia. License is MIT: https://julialang.org/license
       2                 :            : 
       3                 :            : #include "llvm-version.h"
       4                 :            : #include "passes.h"
       5                 :            : 
       6                 :            : #include <llvm-c/Core.h>
       7                 :            : #include <llvm-c/Types.h>
       8                 :            : 
       9                 :            : #include <llvm/ADT/BitVector.h>
      10                 :            : #include <llvm/ADT/PostOrderIterator.h>
      11                 :            : #include <llvm/ADT/SetVector.h>
      12                 :            : #include <llvm/ADT/SmallVector.h>
      13                 :            : #include "llvm/Analysis/CFG.h"
      14                 :            : #include <llvm/IR/Value.h>
      15                 :            : #include <llvm/IR/Constants.h>
      16                 :            : #include <llvm/IR/Dominators.h>
      17                 :            : #include <llvm/IR/Function.h>
      18                 :            : #include <llvm/IR/Instructions.h>
      19                 :            : #include <llvm/IR/IntrinsicInst.h>
      20                 :            : #include <llvm/IR/LegacyPassManager.h>
      21                 :            : #include <llvm/IR/MDBuilder.h>
      22                 :            : #include <llvm/IR/Module.h>
      23                 :            : #include <llvm/IR/IRBuilder.h>
      24                 :            : #include <llvm/IR/Verifier.h>
      25                 :            : #include <llvm/Pass.h>
      26                 :            : #include <llvm/Support/Debug.h>
      27                 :            : #include <llvm/Transforms/Utils/BasicBlockUtils.h>
      28                 :            : #include <llvm/Transforms/Utils/ModuleUtils.h>
      29                 :            : 
      30                 :            : #include <llvm/InitializePasses.h>
      31                 :            : 
      32                 :            : #include "codegen_shared.h"
      33                 :            : #include "julia.h"
      34                 :            : #include "julia_internal.h"
      35                 :            : #include "julia_assert.h"
      36                 :            : #include "llvm-pass-helpers.h"
      37                 :            : #include <map>
      38                 :            : 
      39                 :            : #define DEBUG_TYPE "late_lower_gcroot"
      40                 :            : 
      41                 :            : using namespace llvm;
      42                 :            : 
      43                 :            : /* Julia GC Root Placement pass. For a general overview of the design of GC
      44                 :            :    root lowering, see the devdocs. This file is the actual implementation.
      45                 :            : 
      46                 :            :    The actual algorithm is fairly straightforward. First recall the goal of this
      47                 :            :    pass:
      48                 :            : 
      49                 :            :    Minimize the number of needed gc roots/stores to them subject to the constraint
      50                 :            :    that at every safepoint, any live gc-tracked pointer (i.e. for which there is
      51                 :            :    a path after this point that contains a use of this pointer) is in some gc slot.
      52                 :            : 
      53                 :            :    In particular, in order to understand this algorithm, it is important to
      54                 :            :    realize that the only places where rootedness matters is at safepoints.
      55                 :            : 
      56                 :            :    Now, the primary phases of the algorithm are:
      57                 :            : 
      58                 :            :    1. Local Scan
      59                 :            : 
      60                 :            :       During this step, each Basic Block is inspected and analyzed for local
      61                 :            :       properties. In particular, we want to determine the ordering of any of
      62                 :            :       the following activities:
      63                 :            : 
      64                 :            :         - Any Def of a gc-tracked pointer. In general Defs are the results of
      65                 :            :           calls or loads from appropriate memory locations. Phi nodes and
      66                 :            :           selects do complicate this story slightly as described below.
      67                 :            :         - Any use of a gc-tracked or derived pointer. As described in the
      68                 :            :           devdocs, a use is in general one of
      69                 :            :               a) a load from a tracked/derived value
      70                 :            :               b) a store to a tracked/derived value
      71                 :            :               c) a store OF a tracked/derived value
      72                 :            :               d) a use of a value as a call operand (including operand bundles)
      73                 :            :         - Any safepoint
      74                 :            : 
      75                 :            :       Crucially, we also perform pointer numbering during the local scan,
      76                 :            :       assigning every Def a unique integer and caching the integer for each
      77                 :            :       derived pointer. This allows us to operate only on the set of Defs (
      78                 :            :       represented by these integers) for the rest of the algorithm. We also
      79                 :            :       maintain some local utility information that is needed by later passes
      80                 :            :       (see the BBState struct for details).
      81                 :            : 
      82                 :            :     2. Dataflow Computation
      83                 :            : 
      84                 :            :       This computation operates entirely over the function's control flow graph
      85                 :            :       and does not look into a basic block. The algorithm is essentially
      86                 :            :       textbook iterative data flow for liveness computation. However, the
      87                 :            :       data flow equations are slightly more complicated because we also
      88                 :            :       forward propagate rootedness information in addition to backpropagating
      89                 :            :       liveness.
      90                 :            : 
      91                 :            :     3. Live Set Computation
      92                 :            : 
      93                 :            :       With the liveness information from the previous step, we can now compute,
      94                 :            :       for every safepoint, the set of values live at that particular safepoint.
      95                 :            :       There are three pieces of information being combined here:
      96                 :            :            i. Values that needed to be live due to local analysis (e.g. there
      97                 :            :               was a def, then a safepoint, then a use). This was computed during
      98                 :            :               local analysis.
      99                 :            :           ii. Values that are live across the basic block (i.e. they are live
     100                 :            :               at every safepoint within the basic block). This relies entirely
     101                 :            :               on the liveness information.
     102                 :            :          iii. Values that are now live-out from the basic block (i.e. they are
     103                 :            :               live at every safepoint following their def). During local
     104                 :            :               analysis, we keep, for every safepoint, those values that would
     105                 :            :               be live if they were live out. Here we can check if they are
     106                 :            :               actually live-out and make the appropriate additions to the live
     107                 :            :               set.
     108                 :            : 
     109                 :            :        Lastly, we also explicitly compute, for each value, the list of values
     110                 :            :        that are simultaneously live at some safepoint. This is known as an
     111                 :            :        "interference graph" and is the input to the next step.
     112                 :            : 
     113                 :            :     4. GC Root coloring
     114                 :            : 
     115                 :            :       Two values which are not simultaneously live at a safepoint can share the
     116                 :            :       same slot. This is an important optimization, because otherwise long
     117                 :            :       functions would have exceptionally large GC slots, reducing performance
     118                 :            :       and bloating the size of the stack. Assigning values to these slots is
     119                 :            :       equivalent to doing graph coloring on the interference graph - the graph
     120                 :            :       where nodes are values and two values have an edge if they are
     121                 :            :       simultaneously live at a safepoint - which we computed in the previous
     122                 :            :       step. Now graph coloring in general is a hard problem. However, for SSA
     123                 :            :       form programs, (and most programs in general, by virtue of their
     124                 :            :       structure), the resulting interference graphs are chordal and can be
     125                 :            :       colored optimally in linear time by performing greedy coloring in a
     126                 :            :       perfect elimination order. Now, our interference graphs are likely not
     127                 :            :       entirely chordal due to some non-SSA corner cases. However, using the same
     128                 :            :       algorithm should still give a very good coloring while having sufficiently
     129                 :            :       low runtime.
     130                 :            : 
     131                 :            :     5. JLCall frame optimizations
     132                 :            : 
     133                 :            :       Unlike earlier iterations of the gc root placement logic, jlcall frames
     134                 :            :       are no longer treated as a special case and need not necessarily be sunk
     135                 :            :       into the gc frame. Additionally, we now emit lifetime
     136                 :            :       intrinsics, so regular stack slot coloring will merge any jlcall frames
     137                 :            :       not sunk into the gc frame. Nevertheless performing such sinking can still
     138                 :            :       be profitable. Since all arguments to a jlcall are guaranteed to be live
     139                 :            :       at that call in some gc slot, we can attempt to rearrange the slots within
     140                 :            :       the gc-frame, or re-use slots not assigned at that particular location
     141                 :            :       for the gcframe. However, even without this optimization, stack frames
     142                 :            :       are at most two times larger than optimal (because regular stack coloring
     143                 :            :       can merge the jlcall allocas).
     144                 :            : 
     145                 :            :       N.B.: This step is not yet implemented.
     146                 :            : 
     147                 :            :     6. Root placement
     148                 :            : 
     149                 :            :       This performs the actual insertion of the GCFrame pushes/pops, zeros out
     150                 :            :       the gc frame and creates the stores to the gc frame according to the
     151                 :            :       stack slot assignment computed in the previous step. GC frames stores
     152                 :            :       are generally sunk right before the first safe point that use them
     153                 :            :       (this is beneficial for code where the primary path does not have
     154                 :            :       safepoints, but some other path - e.g. the error path does). However,
     155                 :            :       if the first safepoint is not dominated by the definition (this can
     156                 :            :       happen due to the non-ssa corner cases), the store is inserted right after
     157                 :            :       the definition.
     158                 :            : 
     159                 :            :     7. Cleanup
     160                 :            : 
     161                 :            :       This step performs necessary cleanup before passing the IR to codegen. In
     162                 :            :       particular, it removes any calls to julia_from_objref intrinsics and
     163                 :            :       removes the extra operand bundles from ccalls. In the future it could
     164                 :            :       also strip the addrspace information from all values as this
     165                 :            :       information is no longer needed.
     166                 :            : 
     167                 :            : 
     168                 :            :   There are a couple important special cases that deserve special attention:
     169                 :            : 
     170                 :            :     A. PHIs and Selects
     171                 :            : 
     172                 :            :       In general PHIs and selects are treated as separate defs for the purposes
     173                 :            :       of the algorithm and their operands as uses of those values. It is
     174                 :            :       important to consider however WHERE the uses of PHI's operands are
     175                 :            :       located. It is neither at the start of the basic block, because the values
     176                 :            :       do not dominate the block (so can't really consider them live-in), nor
     177                 :            :       at the end of the predecessor (because they are actually live out).
     178                 :            :       Instead it is best to think of those uses as living on the edge between
     179                 :            :       the appropriate predecessor and the block containing the PHI.
     180                 :            : 
     181                 :            :       Another concern is PHIs of derived values. Since we cannot simply root
     182                 :            :       these values by storing them to a GC slot, we need to insert a new,
     183                 :            :       artificial PHI that tracks the base pointers for the derived values. E.g.
     184                 :            :       in:
     185                 :            : 
     186                 :            :       A:
     187                 :            :         %Abase = load addrspace(10) *...
     188                 :            :         %Aderived = addrspacecast %Abase to addrspace(11)
     189                 :            :       B:
     190                 :            :         %Bbase = load addrspace(10) *...
     191                 :            :         %Bderived = addrspacecast %Bbase to addrspace(11)
     192                 :            :       C:
     193                 :            :         %phi = phi [%Aderived, %A
     194                 :            :                     %Bderived, %B]
     195                 :            : 
     196                 :            :       we will insert another phi in C to track the relevant base pointers:
     197                 :            : 
     198                 :            :         %philift = phi [%Abase, %A
     199                 :            :                         %Bbase, %B]
     200                 :            : 
     201                 :            :       We then pretend, for the purposes of numbering that %phi was derived from
     202                 :            :       %philift. Note that in order to be able to do this, we need to be able to
     203                 :            :       perform this lifting either during numbering or instruction scanning.
     204                 :            : 
     205                 :            :     B. Vectors of pointers/Union representations
     206                 :            : 
     207                 :            :       Since this pass runs very late in the pass pipeline, it runs after the
     208                 :            :       various vectorization passes. As a result, we have to potentially deal
     209                 :            :       with vectors of gc-tracked pointers. For the purposes of most of the
     210                 :            :       algorithm, we simply assign every element of the vector a separate number
     211                 :            :       and no changes are needed. However, those parts of the algorithm that
     212                 :            :       look at IR need to be aware of the possibility of encountering vectors of
     213                 :            :       pointers.
     214                 :            : 
     215                 :            :       Similarly, unions (e.g. in call returns) are represented as a struct of
     216                 :            :       a gc-tracked value and an argument selector. We simply assign a single
     217                 :            :       number to this struct and proceed as if it was a single pointer. However,
     218                 :            :       this again requires care at the IR level.
     219                 :            : 
     220                 :            :     C. Non mem2reg'd allocas
     221                 :            : 
     222                 :            :       Under some circumstances, allocas will still be present in the IR when
     223                 :            :       we get to this pass. We don't try very hard to handle this case, and
     224                 :            :       simply sink the alloca into the GCFrame.
     225                 :            : */
     226                 :            : 
     227                 :            : struct BBState {
     228                 :            :     // Uses in this BB
     229                 :            :     // These do not get updated after local analysis
     230                 :            :     BitVector Defs;
     231                 :            :     BitVector PhiOuts;
     232                 :            :     BitVector UpExposedUses;
     233                 :            :     // These get updated during dataflow
     234                 :            :     BitVector LiveIn;
     235                 :            :     BitVector LiveOut;
     236                 :            :     std::vector<int> Safepoints;
     237                 :            :     int TopmostSafepoint = -1;
     238                 :            :     bool HasSafepoint = false;
     239                 :            :     // Have we gone through this basic block in our local scan yet?
     240                 :            :     bool Done = false;
     241                 :            : };
     242                 :            : 
     243                 :            : struct State {
     244                 :            :     Function *const F;
     245                 :            :     DominatorTree *DT;
     246                 :            : 
     247                 :            :     // The maximum assigned value number
     248                 :            :     int MaxPtrNumber;
     249                 :            :     // The maximum assigned safepoint number
     250                 :            :     int MaxSafepointNumber;
     251                 :            :     // Cache of numbers assigned to IR values. This includes caching of numbers
     252                 :            :     // for derived values
     253                 :            :     std::map<Value *, int> AllPtrNumbering;
     254                 :            :     std::map<Value *, std::vector<int>> AllCompositeNumbering;
     255                 :            :     // The reverse of the previous maps
     256                 :            :     std::map<int, Value *> ReversePtrNumbering;
     257                 :            :     // Neighbors in the coloring interference graph. I.e. for each value, the
     258                 :            :     // indices of other values that are used simultaneously at some safe point.
     259                 :            :     std::vector<SetVector<int>> Neighbors;
     260                 :            :     // The result of the local analysis
     261                 :            :     std::map<BasicBlock *, BBState> BBStates;
     262                 :            : 
     263                 :            :     // Refinement map. If all of the values are rooted
     264                 :            :     // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
     265                 :            :     // the key is already rooted (but not the other way around).
     266                 :            :     // A value that can be refined to -2 never need any rooting or write barrier.
     267                 :            :     // A value that can be refined to -1 don't need local root but still need write barrier.
     268                 :            :     // At the end of `LocalScan` this map has a few properties
     269                 :            :     // 1. Values are either < 0 or dominates the key
     270                 :            :     // 2. Therefore this is a DAG
     271                 :            :     std::map<int, SmallVector<int, 1>> Refinements;
     272                 :            : 
     273                 :            :     // GC preserves map. All safepoints dominated by the map key, but not any
     274                 :            :     // of its uses need to preserve the values listed in the map value.
     275                 :            :     std::map<Instruction *, std::vector<int>> GCPreserves;
     276                 :            : 
     277                 :            :     // The assignment of numbers to safepoints. The indices in the map
     278                 :            :     // are indices into the next three maps which store safepoint properties
     279                 :            :     std::map<Instruction *, int> SafepointNumbering;
     280                 :            : 
     281                 :            :     // Reverse mapping index -> safepoint
     282                 :            :     std::vector<Instruction *> ReverseSafepointNumbering;
     283                 :            : 
     284                 :            :     // Instructions that can return twice. For now, all values live at these
     285                 :            :     // instructions will get their own, dedicated GC frame slots, because they
     286                 :            :     // have unobservable control flow, so we can't be sure where they're
     287                 :            :     // actually live. All of these are also considered safepoints.
     288                 :            :     std::vector<Instruction *> ReturnsTwice;
     289                 :            : 
     290                 :            :     // The set of values live at a particular safepoint
     291                 :            :     std::vector<BitVector> LiveSets;
     292                 :            :     // Those values that - if live out from our parent basic block - are live
     293                 :            :     // at this safepoint.
     294                 :            :     std::vector<std::vector<int>> LiveIfLiveOut;
     295                 :            :     // The set of values that are kept alive by the callee.
     296                 :            :     std::vector<std::vector<int>> CalleeRoots;
     297                 :            :     // We don't bother doing liveness on Allocas that were not mem2reg'ed.
     298                 :            :     // they just get directly sunk into the root array.
     299                 :            :     std::vector<AllocaInst *> Allocas;
     300                 :            :     DenseMap<AllocaInst *, unsigned> ArrayAllocas;
     301                 :            :     DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
     302                 :            :     std::vector<std::pair<StoreInst *, unsigned>> TrackedStores;
     303                 :     131071 :     State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
     304                 :            : };
     305                 :            : 
     306                 :            : 
     307                 :            : 
     308                 :            : struct LateLowerGCFrameLegacy: public FunctionPass {
     309                 :            :     static char ID;
     310                 :         17 :     LateLowerGCFrameLegacy() : FunctionPass(ID) {}
     311                 :            : 
     312                 :            : protected:
     313                 :         17 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
     314                 :         17 :         FunctionPass::getAnalysisUsage(AU);
     315                 :         17 :         AU.addRequired<DominatorTreeWrapperPass>();
     316                 :         17 :         AU.addPreserved<DominatorTreeWrapperPass>();
     317                 :         17 :         AU.setPreservesCFG();
     318                 :         17 :     }
     319                 :            : 
     320                 :            : private:
     321                 :            :     bool runOnFunction(Function &F) override;
     322                 :            : };
     323                 :            : 
     324                 :            : struct LateLowerGCFrame:  private JuliaPassContext {
     325                 :            :     function_ref<DominatorTree &()> GetDT;
     326                 :     137811 :     LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
     327                 :            : 
     328                 :            : public:
     329                 :            :     bool runOnFunction(Function &F, bool *CFGModified = nullptr);
     330                 :            : 
     331                 :            : private:
     332                 :            :     CallInst *pgcstack;
     333                 :            : 
     334                 :            :     void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
     335                 :            :     void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses);
     336                 :    9042910 :     void NoteUse(State &S, BBState &BBS, Value *V) {
     337                 :    9042910 :         NoteUse(S, BBS, V, BBS.UpExposedUses);
     338                 :    9042910 :     }
     339                 :            : 
     340                 :            :     void LiftPhi(State &S, PHINode *Phi);
     341                 :            :     void LiftSelect(State &S, SelectInst *SI);
     342                 :            :     Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
     343                 :            :     std::vector<Value*> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
     344                 :            :     Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
     345                 :            : 
     346                 :            :     int Number(State &S, Value *V);
     347                 :            :     int NumberBase(State &S, Value *Base);
     348                 :            :     std::vector<int> NumberAll(State &S, Value *V);
     349                 :            :     std::vector<int> NumberAllBase(State &S, Value *Base);
     350                 :            : 
     351                 :            :     void NoteOperandUses(State &S, BBState &BBS, User &UI);
     352                 :            :     void MaybeTrackDst(State &S, MemTransferInst *MI);
     353                 :            :     void MaybeTrackStore(State &S, StoreInst *I);
     354                 :            :     State LocalScan(Function &F);
     355                 :            :     void ComputeLiveness(State &S);
     356                 :            :     void ComputeLiveSets(State &S);
     357                 :            :     std::vector<int> ColorRoots(const State &S);
     358                 :            :     void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertBefore);
     359                 :            :     void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
     360                 :            :     void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
     361                 :            :     bool CleanupIR(Function &F, State *S, bool *CFGModified);
     362                 :            :     void NoteUseChain(State &S, BBState &BBS, User *TheUser);
     363                 :            :     SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
     364                 :            :     void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
     365                 :            :     void RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots);
     366                 :            :     Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
     367                 :            :     Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
     368                 :            : };
     369                 :            : 
     370                 :    2697780 : static unsigned getValueAddrSpace(Value *V) {
     371                 :    2697780 :     return V->getType()->getPointerAddressSpace();
     372                 :            : }
     373                 :            : 
     374                 :    6304520 : static bool isTrackedValue(Value *V) {
     375                 :    6304520 :     PointerType *PT = dyn_cast<PointerType>(V->getType()->getScalarType());
     376   [ +  -  +  + ]:    6304520 :     return PT && PT->getAddressSpace() == AddressSpace::Tracked;
     377                 :            : }
     378                 :            : 
     379                 :   24486800 : static bool isSpecialPtr(Type *Ty) {
     380                 :   24486800 :     PointerType *PTy = dyn_cast<PointerType>(Ty);
     381         [ -  + ]:   24486800 :     if (!PTy)
     382                 :          0 :         return false;
     383                 :   24486800 :     unsigned AS = PTy->getAddressSpace();
     384   [ +  +  +  - ]:   24486800 :     return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
     385                 :            : }
     386                 :            : 
     387                 :            : // return how many Special pointers are in T (count > 0),
     388                 :            : // and if there is anything else in T (all == false)
     389                 :    8554380 : CountTrackedPointers::CountTrackedPointers(Type *T) {
     390         [ +  + ]:    8554380 :     if (isa<PointerType>(T)) {
     391         [ +  + ]:    1850670 :         if (isSpecialPtr(T)) {
     392                 :    1822110 :             count++;
     393         [ +  + ]:    1822110 :             if (T->getPointerAddressSpace() != AddressSpace::Tracked)
     394                 :      13309 :                 derived = true;
     395                 :            :         }
     396   [ +  +  +  +  :    6703710 :     } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
             +  +  +  + ]
     397         [ +  + ]:    4475690 :         for (Type *ElT : T->subtypes()) {
     398                 :    3070930 :             auto sub = CountTrackedPointers(ElT);
     399                 :    3070930 :             count += sub.count;
     400                 :    3070930 :             all &= sub.all;
     401                 :    3070930 :             derived |= sub.derived;
     402                 :            :         }
     403         [ +  + ]:    1404760 :         if (isa<ArrayType>(T))
     404                 :     797832 :             count *= cast<ArrayType>(T)->getNumElements();
     405         [ +  + ]:     606923 :         else if (isa<VectorType>(T)) {
     406                 :        156 :             ElementCount EC = cast<VectorType>(T)->getElementCount();
     407                 :        156 :             count *= EC.getKnownMinValue();
     408                 :            :         }
     409                 :            :     }
     410         [ +  + ]:    8554380 :     if (count == 0)
     411                 :    5704960 :         all = false;
     412                 :    8554380 : }
     413                 :            : 
     414                 :     372823 : unsigned getCompositeNumElements(Type *T) {
     415         [ +  + ]:     372823 :     if (auto *ST = dyn_cast<StructType>(T))
     416                 :     189840 :         return ST->getNumElements();
     417         [ +  - ]:     182983 :     else if (auto *AT = dyn_cast<ArrayType>(T))
     418                 :     182983 :         return AT->getNumElements();
     419                 :            :     else {
     420                 :          0 :         ElementCount EC = cast<VectorType>(T)->getElementCount();
     421                 :          0 :         return EC.getKnownMinValue();
     422                 :            :     }
     423                 :            : }
     424                 :            : 
     425                 :            : // Walk through a Type, and record the element path to every tracked value inside
     426                 :    1523960 : void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::vector<unsigned>> &Numberings) {
     427         [ +  + ]:    1523960 :     if (isa<PointerType>(T)) {
     428         [ +  - ]:     778884 :         if (T->getPointerAddressSpace() == AddressSpace::Tracked)
     429                 :     778884 :             Numberings.push_back(Idxs);
     430                 :            :     }
     431   [ +  +  +  +  :     745071 :     else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
             -  +  +  + ]
     432                 :     372823 :         unsigned Idx, NumEl = getCompositeNumElements(T);
     433         [ +  + ]:    1703670 :         for (Idx = 0; Idx < NumEl; Idx++) {
     434                 :    1330850 :             Idxs.push_back(Idx);
     435                 :    1330850 :             Type *ElT = GetElementPtrInst::getTypeAtIndex(T, Idx);
     436                 :    1330850 :             TrackCompositeType(ElT, Idxs, Numberings);
     437                 :    1330850 :             Idxs.pop_back();
     438                 :            :         }
     439                 :            :     }
     440                 :    1523960 : }
     441                 :            : 
     442                 :     193106 : std::vector<std::vector<unsigned>> TrackCompositeType(Type *T) {
     443                 :     386212 :     std::vector<unsigned> Idxs;
     444                 :     193106 :     std::vector<std::vector<unsigned>> Numberings;
     445                 :     193106 :     TrackCompositeType(T, Idxs, Numberings);
     446                 :     193106 :     return Numberings;
     447                 :            : }
     448                 :            : 
     449                 :            : 
     450                 :            : 
     451                 :            : // Walk through simple expressions to until we hit something that requires root numbering
     452                 :            : // If the input value is a scalar (pointer), we may return a composite value as base
     453                 :            : // in which case the second member of the pair is the index of the value in the vector.
     454                 :    5534120 : static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCache = true) {
     455                 :    5534120 :     Value *CurrentV = V;
     456                 :    5534120 :     int fld_idx = -1;
     457                 :            :     while (true) {
     458         [ +  + ]:    9010680 :         if (UseCache) {
     459         [ +  + ]:    8953910 :             if (CurrentV->getType()->isPointerTy()) {
     460                 :    8765930 :                 auto it = S.AllPtrNumbering.find(CurrentV);
     461         [ +  + ]:    8765930 :                 if (it != S.AllPtrNumbering.end())
     462                 :    3319510 :                     return std::make_pair(CurrentV, fld_idx);
     463                 :            :             } else {
     464                 :     187984 :                 auto it = S.AllCompositeNumbering.find(CurrentV);
     465         [ -  + ]:     187984 :                 if (it != S.AllCompositeNumbering.end())
     466                 :          0 :                     return std::make_pair(CurrentV, fld_idx);
     467                 :            :             }
     468                 :            :         }
     469                 :            :         // Note that this is true:
     470                 :            :         //   assert(fld_idx == -1 ? CurrentV->getType()->isPointerTy() : CurrentV->getType()->isVectorPointerTy());
     471         [ +  + ]:    5691180 :         if (isa<BitCastInst>(CurrentV))
     472                 :    1329360 :             CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
     473         [ +  + ]:    4361820 :         else if (isa<AddrSpaceCastInst>(CurrentV)) {
     474                 :    1546650 :             Value *NewV = cast<AddrSpaceCastInst>(CurrentV)->getOperand(0);
     475         [ +  + ]:    1546650 :             if (getValueAddrSpace(NewV) == 0)
     476                 :     499092 :                 break;
     477                 :    1047560 :             CurrentV = NewV;
     478         [ -  + ]:    2815170 :         } else if (auto *Freeze = dyn_cast<FreezeInst>(CurrentV)) {
     479                 :          0 :             CurrentV = Freeze->getOperand(0); // Can be formed by optimizations, treat as a no-op
     480         [ +  + ]:    2815170 :         } else if (auto *GEP = dyn_cast<GetElementPtrInst>(CurrentV)) {
     481                 :     966671 :             CurrentV = GEP->getOperand(0);
     482                 :            :             // GEP can make vectors from a single base pointer
     483   [ -  +  -  -  :     966671 :             if (fld_idx != -1 && !isa<VectorType>(CurrentV->getType())) {
                   -  + ]
     484                 :          0 :                 fld_idx = -1;
     485                 :            :             }
     486                 :            :         }
     487         [ -  + ]:    1848500 :         else if (auto EEI = dyn_cast<ExtractElementInst>(CurrentV)) {
     488   [ #  #  #  # ]:          0 :             assert(CurrentV->getType()->isPointerTy() && fld_idx == -1);
     489                 :            :             // TODO: For now, only support constant index.
     490                 :          0 :             auto IdxOp = cast<ConstantInt>(EEI->getIndexOperand());
     491                 :          0 :             fld_idx = IdxOp->getLimitedValue(INT_MAX);
     492                 :          0 :             CurrentV = EEI->getVectorOperand();
     493                 :            :         }
     494         [ +  + ]:    1848500 :         else if (auto LI = dyn_cast<LoadInst>(CurrentV)) {
     495         [ +  + ]:     907331 :             if (auto PtrT = dyn_cast<PointerType>(LI->getType()->getScalarType())) {
     496         [ +  + ]:     877553 :                 if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
     497                 :     132977 :                     CurrentV = LI->getPointerOperand();
     498                 :     132977 :                     fld_idx = -1;
     499         [ -  + ]:     132977 :                     if (!isSpecialPtr(CurrentV->getType())) {
     500                 :            :                         // This could really be anything, but it's not loaded
     501                 :            :                         // from a tracked pointer, so it doesn't matter what
     502                 :            :                         // it is--just pick something simple.
     503                 :          0 :                         CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
     504                 :            :                     }
     505                 :     132977 :                     continue;
     506                 :            :                 }
     507                 :            :             }
     508                 :            :             // In general a load terminates a walk
     509                 :     774354 :             break;
     510                 :            :         }
     511         [ +  + ]:     941172 :         else if (auto LI = dyn_cast<AtomicCmpXchgInst>(CurrentV)) {
     512                 :            :             // In general a load terminates a walk
     513                 :            :             (void)LI;
     514                 :         12 :             break;
     515                 :            :         }
     516         [ -  + ]:     941160 :         else if (auto LI = dyn_cast<AtomicRMWInst>(CurrentV)) {
     517                 :            :             // In general a load terminates a walk
     518                 :            :             (void)LI;
     519                 :          0 :             break;
     520                 :            :         }
     521         [ -  + ]:     941160 :         else if (auto II = dyn_cast<IntrinsicInst>(CurrentV)) {
     522                 :            :             // Some intrinsics behave like LoadInst followed by a SelectInst
     523                 :            :             // This should never happen in a derived addrspace (since those cannot be stored to memory)
     524                 :            :             // so we don't need to lift these operations, but we do need to check if it's loaded and continue walking the base pointer
     525   [ #  #  #  #  :          0 :             if (II->getIntrinsicID() == Intrinsic::masked_load ||
                   #  # ]
     526                 :          0 :                 II->getIntrinsicID() == Intrinsic::masked_gather) {
     527         [ #  # ]:          0 :                 if (auto VTy = dyn_cast<VectorType>(II->getType())) {
     528         [ #  # ]:          0 :                     if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
     529         [ #  # ]:          0 :                         if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
     530                 :          0 :                             Value *Mask = II->getOperand(2);
     531                 :          0 :                             Value *Passthrough = II->getOperand(3);
     532   [ #  #  #  #  :          0 :                             if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
                   #  # ]
     533         [ #  # ]:          0 :                                 assert(isa<UndefValue>(Passthrough) && "unimplemented");
     534                 :            :                                 (void)Passthrough;
     535                 :            :                             }
     536                 :          0 :                             CurrentV = II->getOperand(0);
     537         [ #  # ]:          0 :                             if (II->getIntrinsicID() == Intrinsic::masked_load) {
     538                 :          0 :                                 fld_idx = -1;
     539         [ #  # ]:          0 :                                 if (!isSpecialPtr(CurrentV->getType())) {
     540                 :          0 :                                     CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
     541                 :            :                                 }
     542                 :            :                             } else {
     543         [ #  # ]:          0 :                                 if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
     544         [ #  # ]:          0 :                                     if (!isSpecialPtr(VTy2->getElementType())) {
     545                 :          0 :                                         CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
     546                 :          0 :                                         fld_idx = -1;
     547                 :            :                                     }
     548                 :            :                                 }
     549                 :            :                             }
     550                 :          0 :                             continue;
     551                 :            :                         }
     552                 :            :                     }
     553                 :            :                 }
     554                 :            :                 // In general a load terminates a walk
     555                 :          0 :                 break;
     556                 :            :             }
     557                 :            :         }
     558                 :            :         else {
     559                 :     941160 :             break;
     560                 :            :         }
     561                 :    3476560 :     }
     562   [ +  +  +  +  :    2214620 :     assert(isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) ||
          +  +  +  -  +  
          +  +  +  +  +  
          +  +  +  +  +  
          -  +  +  -  +  
             -  -  -  - ]
     563                 :            :            isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV) ||
     564                 :            :            isa<Argument>(CurrentV) || isa<SelectInst>(CurrentV) ||
     565                 :            :            isa<PHINode>(CurrentV) || isa<AddrSpaceCastInst>(CurrentV) ||
     566                 :            :            isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) ||
     567                 :            :            isa<InsertValueInst>(CurrentV) ||
     568                 :            :            isa<ExtractValueInst>(CurrentV) ||
     569                 :            :            isa<InsertElementInst>(CurrentV) ||
     570                 :            :            isa<ShuffleVectorInst>(CurrentV));
     571                 :    2214620 :     return std::make_pair(CurrentV, fld_idx);
     572                 :            : }
     573                 :            : 
     574                 :     495655 : Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore) {
     575                 :     495655 :     Value *V = ValExpr.first;
     576         [ +  + ]:     495655 :     if (isa<PointerType>(V->getType())) {
     577         [ -  + ]:     457575 :         assert(ValExpr.second == -1);
     578         [ +  + ]:     457575 :         if (!isTrackedValue(V)) {
     579                 :      13309 :             int BaseNumber = NumberBase(S, V);
     580         [ -  + ]:      13309 :             if (BaseNumber >= 0)
     581                 :          0 :                 V = GetPtrForNumber(S, BaseNumber, InsertBefore);
     582                 :            :             else
     583                 :      13309 :                 V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     584                 :            :         }
     585                 :            :     }
     586         [ +  - ]:      38080 :     else if (ValExpr.second != -1) {
     587                 :      38080 :         auto Tracked = TrackCompositeType(V->getType());
     588                 :      38080 :         auto Idxs = makeArrayRef(Tracked.at(ValExpr.second));
     589                 :      38080 :         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
     590                 :      38080 :         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
     591                 :      38080 :         bool IsVector = isa<VectorType>(FinalT);
     592                 :      38080 :         PointerType *T = cast<PointerType>(
     593                 :      38080 :             GetElementPtrInst::getTypeAtIndex(FinalT, Idxs.back()));
     594         [ -  + ]:      38080 :         if (T->getAddressSpace() != AddressSpace::Tracked) {
     595                 :            :             // if V isn't tracked, get the shadow def
     596                 :          0 :             auto Numbers = NumberAllBase(S, V);
     597                 :          0 :             int BaseNumber = Numbers.at(ValExpr.second);
     598         [ #  # ]:          0 :             if (BaseNumber >= 0)
     599                 :          0 :                 V = GetPtrForNumber(S, BaseNumber, InsertBefore);
     600                 :            :             else
     601                 :          0 :                 V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     602                 :          0 :             return V;
     603                 :            :         }
     604         [ +  - ]:      38080 :         if (Idxs.size() > IsVector)
     605         [ -  + ]:      38080 :             V = ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs, "", InsertBefore);
     606         [ -  + ]:      38080 :         if (IsVector)
     607                 :          0 :             V = ExtractElementInst::Create(V,
     608                 :          0 :                     ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()),
     609                 :            :                     "", InsertBefore);
     610                 :            :     }
     611                 :     495655 :     return V;
     612                 :            : }
     613                 :            : 
     614                 :          0 : std::vector<Value*> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
     615                 :          0 :     auto Numbers = NumberAllBase(S, BaseVec);
     616                 :          0 :     std::vector<Value*> V{Numbers.size()};
     617                 :          0 :     Value *V_rnull = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     618         [ #  # ]:          0 :     for (unsigned i = 0; i < V.size(); ++i) {
     619         [ #  # ]:          0 :         if (Numbers[i] >= 0) // ignores undef and poison values
     620                 :          0 :             V[i] = GetPtrForNumber(S, Numbers[i], InsertBefore);
     621                 :            :         else
     622                 :          0 :             V[i] = V_rnull;
     623                 :            :     }
     624                 :          0 :     return V;
     625                 :            : }
     626                 :            : 
     627                 :     454460 : Value *LateLowerGCFrame::GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore)
     628                 :            : {
     629                 :     454460 :     Value *Val = S.ReversePtrNumbering[Num];
     630                 :     454460 :     unsigned Idx = -1;
     631         [ +  + ]:     454460 :     if (!isa<PointerType>(Val->getType())) {
     632                 :      23503 :         const std::vector<int> &AllNums = S.AllCompositeNumbering[Val];
     633         [ +  - ]:      60306 :         for (Idx = 0; Idx < AllNums.size(); ++Idx) {
     634         [ +  + ]:      60306 :             if ((unsigned)AllNums[Idx] == Num)
     635                 :      23503 :                 break;
     636                 :            :         }
     637         [ -  + ]:      23503 :         assert(Idx < AllNums.size());
     638                 :            :     }
     639                 :     454460 :     return MaybeExtractScalar(S, std::make_pair(Val, Idx), InsertBefore);
     640                 :            : }
     641                 :            : 
     642                 :      13611 : void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
     643         [ +  - ]:      13611 :     if (isa<PointerType>(SI->getType()) ?
     644                 :      13611 :             S.AllPtrNumbering.count(SI) :
     645         [ +  + ]:      13611 :             S.AllCompositeNumbering.count(SI)) {
     646                 :            :         // already visited here--nothing to do
     647                 :        302 :         return;
     648                 :            :     }
     649                 :      13309 :     std::vector<int> Numbers;
     650                 :      13309 :     unsigned NumRoots = 1;
     651         [ -  + ]:      13309 :     if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
     652                 :          0 :         ElementCount EC = VTy->getElementCount();
     653                 :          0 :         Numbers.resize(EC.getKnownMinValue(), -1);
     654                 :            :     }
     655                 :            :     else
     656         [ +  - ]:      13309 :         assert(isa<PointerType>(SI->getType()) && "unimplemented");
     657         [ -  + ]:      13309 :     assert(!isTrackedValue(SI));
     658                 :            :     // find the base root for the arguments
     659                 :      13309 :     Value *TrueBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getTrueValue(), false), SI);
     660                 :      13309 :     Value *FalseBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getFalseValue(), false), SI);
     661                 :      13309 :     std::vector<Value*> TrueBases;
     662                 :      13309 :     std::vector<Value*> FalseBases;
     663         [ -  + ]:      13309 :     if (!isa<PointerType>(TrueBase->getType())) {
     664                 :          0 :         TrueBases = MaybeExtractVector(S, TrueBase, SI);
     665         [ #  # ]:          0 :         assert(TrueBases.size() == Numbers.size());
     666                 :          0 :         NumRoots = TrueBases.size();
     667                 :            :     }
     668         [ -  + ]:      13309 :     if (!isa<PointerType>(FalseBase->getType())) {
     669                 :          0 :         FalseBases = MaybeExtractVector(S, FalseBase, SI);
     670         [ #  # ]:          0 :         assert(FalseBases.size() == Numbers.size());
     671                 :          0 :         NumRoots = FalseBases.size();
     672                 :            :     }
     673         [ +  - ]:      13309 :     if (isa<PointerType>(SI->getType()) ?
     674                 :      13309 :             S.AllPtrNumbering.count(SI) :
     675         [ -  + ]:      13309 :             S.AllCompositeNumbering.count(SI)) {
     676                 :            :         // MaybeExtractScalar or MaybeExtractVector handled this for us (recursively, though a PHINode)
     677                 :          0 :         return;
     678                 :            :     }
     679                 :            :     // need to handle each element (may just be one scalar)
     680         [ +  + ]:      26618 :     for (unsigned i = 0; i < NumRoots; ++i) {
     681                 :            :         Value *TrueElem;
     682         [ +  - ]:      13309 :         if (isa<PointerType>(TrueBase->getType()))
     683                 :      13309 :             TrueElem = TrueBase;
     684                 :            :         else
     685                 :          0 :             TrueElem = TrueBases[i];
     686                 :            :         Value *FalseElem;
     687         [ +  - ]:      13309 :         if (isa<PointerType>(FalseBase->getType()))
     688                 :      13309 :             FalseElem = FalseBase;
     689                 :            :         else
     690                 :          0 :             FalseElem = FalseBases[i];
     691                 :      13309 :         Value *Cond = SI->getCondition();
     692         [ -  + ]:      13309 :         if (isa<VectorType>(Cond->getType())) {
     693                 :          0 :             Cond = ExtractElementInst::Create(Cond,
     694                 :          0 :                     ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
     695                 :            :                     "", SI);
     696                 :            :         }
     697         [ -  + ]:      13309 :         if (FalseElem->getType() != TrueElem->getType())
     698                 :          0 :             FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
     699                 :      13309 :         SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
     700                 :      13309 :         int Number = ++S.MaxPtrNumber;
     701                 :      13309 :         S.AllPtrNumbering[SelectBase] = Number;
     702                 :      13309 :         S.ReversePtrNumbering[Number] = SelectBase;
     703         [ +  - ]:      13309 :         if (isa<PointerType>(SI->getType()))
     704                 :      13309 :             S.AllPtrNumbering[SI] = Number;
     705                 :            :         else
     706                 :          0 :             Numbers[i] = Number;
     707                 :            :     }
     708         [ -  + ]:      13309 :     if (auto VTy = dyn_cast<FixedVectorType>(SI->getType())) {
     709         [ #  # ]:          0 :         if (NumRoots != Numbers.size()) {
     710                 :            :             // broadcast the scalar root number to fill the vector
     711         [ #  # ]:          0 :             assert(NumRoots == 1);
     712                 :          0 :             int Number = Numbers[0];
     713                 :          0 :             Numbers.resize(0);
     714                 :          0 :             ElementCount EC = VTy->getElementCount();
     715                 :          0 :             Numbers.resize(EC.getKnownMinValue(), Number);
     716                 :            :         }
     717                 :            :     }
     718         [ -  + ]:      13309 :     if (!isa<PointerType>(SI->getType()))
     719                 :          0 :         S.AllCompositeNumbering[SI] = Numbers;
     720                 :            : }
     721                 :            : 
     722                 :          0 : void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
     723         [ #  # ]:          0 :     if (isa<PointerType>(Phi->getType()) ?
     724                 :          0 :             S.AllPtrNumbering.count(Phi) :
     725         [ #  # ]:          0 :             S.AllCompositeNumbering.count(Phi))
     726                 :          0 :         return;
     727                 :            :     // need to handle each element (may just be one scalar)
     728                 :          0 :     SmallVector<PHINode *, 2> lifted;
     729                 :          0 :     std::vector<int> Numbers;
     730                 :          0 :     unsigned NumRoots = 1;
     731         [ #  # ]:          0 :     if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
     732                 :          0 :         NumRoots = VTy->getNumElements();
     733                 :          0 :         Numbers.resize(NumRoots);
     734                 :            :     }
     735                 :            :     else {
     736                 :            :         // TODO: SVE
     737         [ #  # ]:          0 :         assert(isa<PointerType>(Phi->getType()) && "unimplemented");
     738                 :            :     }
     739         [ #  # ]:          0 :     for (unsigned i = 0; i < NumRoots; ++i) {
     740                 :          0 :         PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi);
     741                 :          0 :         int Number = ++S.MaxPtrNumber;
     742                 :          0 :         S.AllPtrNumbering[lift] = Number;
     743                 :          0 :         S.ReversePtrNumbering[Number] = lift;
     744         [ #  # ]:          0 :         if (!isa<VectorType>(Phi->getType()))
     745                 :          0 :             S.AllPtrNumbering[Phi] = Number;
     746                 :            :         else
     747                 :          0 :             Numbers[i] = Number;
     748                 :          0 :         lifted.push_back(lift);
     749                 :            :     }
     750         [ #  # ]:          0 :     if (!isa<PointerType>(Phi->getType()))
     751                 :          0 :         S.AllCompositeNumbering[Phi] = Numbers;
     752         [ #  # ]:          0 :     for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) {
     753                 :          0 :         Value *Incoming = Phi->getIncomingValue(i);
     754                 :          0 :         BasicBlock *IncomingBB = Phi->getIncomingBlock(i);
     755                 :          0 :         Instruction *Terminator = IncomingBB->getTerminator();
     756                 :          0 :         Value *Base = MaybeExtractScalar(S, FindBaseValue(S, Incoming, false), Terminator);
     757                 :          0 :         std::vector<Value*> IncomingBases;
     758         [ #  # ]:          0 :         if (!isa<PointerType>(Base->getType())) {
     759                 :          0 :             IncomingBases = MaybeExtractVector(S, Base, Terminator);
     760         [ #  # ]:          0 :             assert(IncomingBases.size() == NumRoots);
     761                 :            :         }
     762         [ #  # ]:          0 :         for (unsigned i = 0; i < NumRoots; ++i) {
     763                 :          0 :             PHINode *lift = lifted[i];
     764                 :            :             Value *BaseElem;
     765         [ #  # ]:          0 :             if (isa<PointerType>(Base->getType()))
     766                 :          0 :                 BaseElem = Base;
     767                 :            :             else
     768                 :          0 :                 BaseElem = IncomingBases[i];
     769         [ #  # ]:          0 :             if (BaseElem->getType() != T_prjlvalue)
     770                 :          0 :                 BaseElem = new BitCastInst(BaseElem, T_prjlvalue, "", Terminator);
     771                 :          0 :             lift->addIncoming(BaseElem, IncomingBB);
     772                 :            :         }
     773                 :            :     }
     774                 :            : }
     775                 :            : 
     776                 :    5332830 : int LateLowerGCFrame::NumberBase(State &S, Value *CurrentV)
     777                 :            : {
     778                 :    5332830 :     auto it = S.AllPtrNumbering.find(CurrentV);
     779         [ +  + ]:    5332830 :     if (it != S.AllPtrNumbering.end())
     780                 :    3319510 :         return it->second;
     781                 :            :     int Number;
     782         [ +  + ]:    2013320 :     if (isa<Constant>(CurrentV)) {
     783                 :            :         // Perm rooted
     784                 :      24583 :         Number = -2;
     785   [ +  +  +  -  :    4384410 :     } else if (isa<Argument>(CurrentV) || isa<AllocaInst>(CurrentV) ||
             +  +  +  + ]
     786         [ +  + ]:    2395670 :             (isa<AddrSpaceCastInst>(CurrentV) && !isTrackedValue(CurrentV))) {
     787                 :            :         // We know this is rooted in the parent
     788                 :            :         // future note: we could chose to exclude argument of type CalleeRooted here
     789                 :     186468 :         Number = -1;
     790         [ -  + ]:    1802270 :     } else if (!isSpecialPtr(CurrentV->getType())) {
     791                 :            :         // Externally rooted somehow hopefully (otherwise there's a bug in the
     792                 :            :         // input IR)
     793                 :          0 :         Number = -1;
     794   [ +  +  +  +  :    1802270 :     } else if (isa<SelectInst>(CurrentV) && !isTrackedValue(CurrentV)) {
                   +  + ]
     795                 :        302 :         LiftSelect(S, cast<SelectInst>(CurrentV));
     796                 :        302 :         Number = S.AllPtrNumbering.at(CurrentV);
     797                 :        302 :         return Number;
     798   [ +  +  -  +  :    1801970 :     } else if (isa<PHINode>(CurrentV) && !isTrackedValue(CurrentV)) {
                   -  + ]
     799                 :          0 :         LiftPhi(S, cast<PHINode>(CurrentV));
     800                 :          0 :         Number = S.AllPtrNumbering.at(CurrentV);
     801                 :          0 :         return Number;
     802         [ +  + ]:    1801970 :     } else if (isa<ExtractValueInst>(CurrentV)) {
     803                 :      39871 :         auto Numbers = NumberAllBase(S, CurrentV);
     804         [ -  + ]:      39871 :         assert(Numbers.size() == 1);
     805                 :      39871 :         Number = Numbers[0];
     806                 :            :     } else {
     807   [ +  -  +  - ]:    1762100 :         assert((CurrentV->getType()->isPointerTy() && isTrackedValue(CurrentV)));
     808                 :    1762100 :         Number = ++S.MaxPtrNumber;
     809                 :    1762100 :         S.ReversePtrNumbering[Number] = CurrentV;
     810                 :            :     }
     811                 :    2013020 :     S.AllPtrNumbering[CurrentV] = Number;
     812                 :    2013020 :     return Number;
     813                 :            : }
     814                 :            : 
     815                 :    5285310 : int LateLowerGCFrame::Number(State &S, Value *V) {
     816         [ -  + ]:    5285310 :     assert(isSpecialPtr(V->getType()));
     817                 :    5285310 :     auto CurrentV = FindBaseValue(S, V);
     818                 :            :     int Number;
     819         [ +  - ]:    5285310 :     if (CurrentV.second == -1) {
     820                 :    5285310 :         Number = NumberBase(S, CurrentV.first);
     821                 :            :     } else {
     822                 :          0 :         auto Numbers = NumberAllBase(S, CurrentV.first);
     823                 :          0 :         Number = Numbers.at(CurrentV.second);
     824                 :            :     }
     825         [ +  + ]:    5285310 :     if (V != CurrentV.first)
     826                 :    1231580 :         S.AllPtrNumbering[V] = Number;
     827                 :    5285310 :     return Number;
     828                 :            : }
     829                 :            : 
     830                 :            : // assign pointer numbers to a def instruction
     831                 :     227855 : std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
     832         [ +  + ]:     227855 :     if (isa<PointerType>(CurrentV->getType())) {
     833                 :      39871 :         auto it = S.AllPtrNumbering.find(CurrentV);
     834         [ -  + ]:      39871 :         if (it != S.AllPtrNumbering.end())
     835                 :          0 :             return std::vector<int>({it->second});
     836                 :            :     } else {
     837                 :     187984 :         auto it = S.AllCompositeNumbering.find(CurrentV);
     838         [ -  + ]:     187984 :         if (it != S.AllCompositeNumbering.end())
     839                 :          0 :             return it->second;
     840                 :            :     }
     841                 :            : 
     842                 :     455710 :     std::vector<int> Numbers;
     843                 :     227855 :     auto tracked = CountTrackedPointers(CurrentV->getType());
     844         [ -  + ]:     227855 :     if (tracked.count == 0)
     845                 :          0 :         return Numbers;
     846   [ +  +  +  -  :     429073 :     if (isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) || isa<Argument>(CurrentV) ||
          +  -  -  +  +  
                      + ]
     847         [ -  - ]:     201218 :             (isa<AddrSpaceCastInst>(CurrentV) && !isTrackedValue(CurrentV))) {
     848                 :      26637 :         Numbers.resize(tracked.count, -1);
     849                 :            :     }
     850         [ -  + ]:     201218 :     else if (auto *SVI = dyn_cast<ShuffleVectorInst>(CurrentV)) {
     851                 :          0 :         std::vector<int> Numbers1 = NumberAll(S, SVI->getOperand(0));
     852                 :          0 :         std::vector<int> Numbers2 = NumberAll(S, SVI->getOperand(1));
     853                 :          0 :         auto Mask = SVI->getShuffleMask();
     854         [ #  # ]:          0 :         for (auto idx : Mask) {
     855         [ #  # ]:          0 :             if (idx == -1) {
     856                 :          0 :                 Numbers.push_back(-1);
     857         [ #  # ]:          0 :             } else if ((unsigned)idx < Numbers1.size()) {
     858                 :          0 :                 Numbers.push_back(Numbers1.at(idx));
     859                 :            :             } else {
     860                 :          0 :                 Numbers.push_back(Numbers2.at(idx - Numbers1.size()));
     861                 :            :             }
     862                 :            :         }
     863         [ -  + ]:     201218 :     } else if (auto *IEI = dyn_cast<InsertElementInst>(CurrentV)) {
     864                 :            :         // TODO: handle non-constant: LiftInsertElement(S, IEI)
     865                 :          0 :         unsigned idx = cast<ConstantInt>(IEI->getOperand(2))->getZExtValue();
     866                 :          0 :         Numbers = NumberAll(S, IEI->getOperand(0));
     867                 :          0 :         int ElNumber = Number(S, IEI->getOperand(1));
     868                 :          0 :         Numbers[idx] = ElNumber;
     869         [ +  + ]:     201218 :     } else if (auto *IVI = dyn_cast<InsertValueInst>(CurrentV)) {
     870                 :      96834 :         Numbers = NumberAll(S, IVI->getAggregateOperand());
     871                 :     193668 :         auto Tracked = TrackCompositeType(IVI->getType());
     872         [ -  + ]:      96834 :         assert(Tracked.size() == Numbers.size());
     873                 :     193668 :         std::vector<int> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
     874                 :      96834 :         auto Idxs = IVI->getIndices();
     875                 :      96834 :         unsigned j = 0;
     876         [ +  + ]:     480157 :         for (unsigned i = 0; i < Tracked.size(); ++i) {
     877                 :     383323 :             auto Elem = makeArrayRef(Tracked[i]);
     878         [ -  + ]:     383323 :             if (Elem.size() < Idxs.size())
     879                 :          0 :                 continue;
     880         [ +  + ]:     383323 :             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
     881                 :      81196 :                 Numbers[i] = InsertNumbers[j++];
     882                 :            :         }
     883         [ -  + ]:      96834 :         assert(j == InsertNumbers.size());
     884         [ +  + ]:     104384 :     } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurrentV)) {
     885                 :      90540 :         auto BaseNumbers = NumberAll(S, EVI->getAggregateOperand());
     886                 :      45270 :         auto Tracked = TrackCompositeType(EVI->getAggregateOperand()->getType());
     887         [ -  + ]:      45270 :         assert(Tracked.size() == BaseNumbers.size());
     888                 :      45270 :         auto Idxs = EVI->getIndices();
     889         [ +  + ]:     235740 :         for (unsigned i = 0; i < Tracked.size(); ++i) {
     890                 :     190470 :             auto Elem = makeArrayRef(Tracked[i]);
     891         [ +  + ]:     190470 :             if (Elem.size() < Idxs.size())
     892                 :       6929 :                 continue;
     893         [ +  + ]:     183541 :             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
     894                 :      61507 :                 Numbers.push_back(BaseNumbers[i]);
     895                 :            :         }
     896         [ -  + ]:      45270 :         assert(CountTrackedPointers(EVI->getType()).count == Numbers.size());
     897         [ -  + ]:      59114 :     } else if (tracked.derived) {
     898         [ #  # ]:          0 :         if (isa<SelectInst>(CurrentV)) {
     899                 :          0 :             LiftSelect(S, cast<SelectInst>(CurrentV));
     900         [ #  # ]:          0 :         } else if (isa<PHINode>(CurrentV)) {
     901                 :          0 :             LiftPhi(S, cast<PHINode>(CurrentV));
     902                 :            :         // } else if (isa<ExtractElementInst>(CurrentV)) { // TODO: lifting for non constant index
     903                 :            :         } else {
     904                 :          0 :             CurrentV->print(errs());
     905                 :          0 :             llvm_unreachable("Unexpected generating operation for derived values");
     906                 :            :         }
     907         [ #  # ]:          0 :         if (isa<PointerType>(CurrentV->getType())) {
     908                 :          0 :             auto Number = S.AllPtrNumbering.at(CurrentV);
     909                 :          0 :             Numbers.resize(1, Number);
     910                 :            :         } else {
     911                 :          0 :             Numbers = S.AllCompositeNumbering.at(CurrentV);
     912                 :            :         }
     913                 :            :     } else {
     914   [ +  +  +  +  :      59114 :         assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
          +  +  +  +  -  
                +  -  - ]
     915                 :            :                 isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV))
     916                 :            :                 && "unexpected def expression");
     917                 :            :         // This is simple, we can just number them sequentially
     918         [ +  + ]:     230463 :         for (unsigned i = 0; i < tracked.count; ++i) {
     919                 :     171349 :             int Num = ++S.MaxPtrNumber;
     920                 :     171349 :             Numbers.push_back(Num);
     921                 :     171349 :             S.ReversePtrNumbering[Num] = CurrentV;
     922                 :            :         }
     923                 :            :     }
     924         [ +  + ]:     227855 :     if (isa<PointerType>(CurrentV->getType())) {
     925         [ -  + ]:      39871 :         assert(Numbers.size() == 1);
     926                 :      39871 :         S.AllPtrNumbering[CurrentV] = Numbers[0];
     927                 :            :     } else {
     928                 :     187984 :         S.AllCompositeNumbering[CurrentV] = Numbers;
     929                 :            :     }
     930                 :     227855 :     return Numbers;
     931                 :            : }
     932                 :            : 
     933                 :            : // gets the pointer number for every gc tracked value inside V
     934                 :    2115940 : std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
     935         [ +  + ]:    2115940 :     if (isa<PointerType>(V->getType())) {
     936                 :      61604 :         auto it = S.AllPtrNumbering.find(V);
     937         [ +  + ]:      61604 :         if (it != S.AllPtrNumbering.end())
     938                 :      27391 :             return std::vector<int>({it->second});
     939                 :            :     } else {
     940                 :    2054340 :         auto it = S.AllCompositeNumbering.find(V);
     941         [ +  + ]:    2054340 :         if (it != S.AllCompositeNumbering.end())
     942                 :     121020 :             return it->second;
     943                 :            :     }
     944                 :    3935060 :     std::vector<int> Numbers;
     945                 :    1967530 :     auto tracked = CountTrackedPointers(V->getType());
     946         [ +  + ]:    1967530 :     if (tracked.count == 0)
     947                 :    1745330 :         return Numbers;
     948                 :     222197 :     auto CurrentV = FindBaseValue(S, V);
     949                 :     222197 :     int Number = -1;
     950         [ +  + ]:     222197 :     if (isa<PointerType>(CurrentV.first->getType())) {
     951                 :            :         // Base turned out to be a single pointer--number it
     952         [ -  + ]:      34213 :         assert(CurrentV.second == -1);
     953                 :      34213 :         Number = NumberBase(S, CurrentV.first);
     954                 :      34213 :         Numbers.resize(tracked.count, Number);
     955                 :            :     } else {
     956                 :            :         // Base turned out to be an aggregate--get all numbers for it, then sub-select
     957                 :     187984 :         Numbers = NumberAllBase(S, CurrentV.first);
     958         [ -  + ]:     187984 :         if (CurrentV.second != -1) {
     959                 :          0 :             Number = Numbers[CurrentV.second]; // only needed a subset of the values
     960                 :          0 :             Numbers.resize(tracked.count, Number);
     961                 :            :         }
     962                 :            :         else {
     963         [ -  + ]:     187984 :             assert(!isa<PointerType>(V->getType()));
     964                 :            :         }
     965                 :            :     }
     966         [ -  + ]:     222197 :     if (CurrentV.first != V) {
     967         [ #  # ]:          0 :         if (isa<PointerType>(V->getType())) {
     968                 :          0 :             S.AllPtrNumbering[V] = Number;
     969                 :            :         } else {
     970                 :          0 :             S.AllCompositeNumbering[V] = Numbers;
     971                 :            :         }
     972                 :            :     }
     973                 :     222197 :     return Numbers;
     974                 :            : }
     975                 :            : 
     976                 :            : 
     977                 :   11207400 : static void MaybeResize(BBState &BBS, unsigned Idx) {
     978         [ +  + ]:   11207400 :     if (BBS.Defs.size() <= Idx) {
     979                 :    7419290 :         BBS.Defs.resize(Idx + 1);
     980                 :    7419290 :         BBS.UpExposedUses.resize(Idx + 1);
     981                 :    7419290 :         BBS.PhiOuts.resize(Idx + 1);
     982                 :            :     }
     983                 :   11207400 : }
     984                 :            : 
     985                 :  151605000 : static bool HasBitSet(const BitVector &BV, unsigned Bit) {
     986   [ +  +  +  + ]:  151605000 :     return Bit < BV.size() && BV[Bit];
     987                 :            : }
     988                 :            : 
     989                 :    1946760 : static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &SafepointsSoFar) {
     990         [ -  + ]:    1946760 :     assert(Num >= 0);
     991                 :    1946760 :     MaybeResize(BBS, Num);
     992         [ +  - ]:    1946760 :     assert(BBS.Defs[Num] == 0 && "SSA Violation or misnumbering?");
     993                 :    1946760 :     BBS.Defs[Num] = 1;
     994                 :    1946760 :     BBS.UpExposedUses[Num] = 0;
     995                 :            :     // This value could potentially be live at any following safe point
     996                 :            :     // if it ends up live out, so add it to the LiveIfLiveOut lists for all
     997                 :            :     // following safepoints.
     998         [ +  + ]:    3430190 :     for (int Safepoint : SafepointsSoFar) {
     999                 :    1483430 :         S.LiveIfLiveOut[Safepoint].push_back(Num);
    1000                 :            :     }
    1001                 :    1946760 : }
    1002                 :            : 
    1003                 :    5526100 : void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr) {
    1004                 :    5526100 :     Type *RT = Def->getType();
    1005         [ +  + ]:    5526100 :     if (isa<PointerType>(RT)) {
    1006         [ +  + ]:    4269090 :         if (!isSpecialPtr(RT))
    1007                 :    2493680 :             return;
    1008         [ +  - ]:    1775410 :         assert(isTrackedValue(Def) && "Returned value of GC interest, but not tracked?");
    1009                 :    1775410 :         int Num = Number(S, Def);
    1010                 :    1775410 :         NoteDef(S, BBS, Num, SafepointsSoFar);
    1011         [ +  + ]:    1775410 :         if (!RefinedPtr.empty())
    1012                 :     895759 :             S.Refinements[Num] = std::move(RefinedPtr);
    1013                 :            :     }
    1014                 :            :     else {
    1015                 :    2514030 :         std::vector<int> Nums = NumberAll(S, Def);
    1016         [ +  + ]:    1428360 :         for (int Num : Nums) {
    1017                 :     171349 :             NoteDef(S, BBS, Num, SafepointsSoFar);
    1018         [ +  + ]:     171349 :             if (!RefinedPtr.empty())
    1019                 :      12631 :                 S.Refinements[Num] = RefinedPtr;
    1020                 :            :         }
    1021                 :            :     }
    1022                 :            : }
    1023                 :            : 
    1024                 :     823932 : static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
    1025                 :     823932 :     int Number = ++S.MaxSafepointNumber;
    1026                 :     823932 :     S.SafepointNumbering[CI] = Number;
    1027                 :     823932 :     S.ReverseSafepointNumbering.push_back(CI);
    1028                 :            :     // Note which pointers are upward exposed live here. They need to be
    1029                 :            :     // considered live at this safepoint even when they have a def earlier
    1030                 :            :     // in this BB (i.e. even when they don't participate in the dataflow
    1031                 :            :     // computation)
    1032                 :     823932 :     S.LiveSets.push_back(BBS.UpExposedUses);
    1033                 :     823932 :     S.LiveIfLiveOut.push_back(std::vector<int>{});
    1034                 :     823932 :     S.CalleeRoots.push_back(std::move(CalleeRoots));
    1035                 :     823932 :     return Number;
    1036                 :            : }
    1037                 :            : 
    1038                 :    9383070 : void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses) {
    1039                 :            :     // Short circuit to avoid having to deal with vectors of constants, etc.
    1040         [ +  + ]:    9383070 :     if (isa<Constant>(V))
    1041                 :    4654080 :         return;
    1042         [ +  + ]:    4728990 :     if (isa<PointerType>(V->getType())) {
    1043         [ +  + ]:    4124260 :         if (isSpecialPtr(V->getType())) {
    1044                 :    2770160 :             int Num = Number(S, V);
    1045         [ +  + ]:    2770160 :             if (Num < 0)
    1046                 :     251674 :                 return;
    1047                 :    2518490 :             MaybeResize(BBS, Num);
    1048                 :    2518490 :             Uses[Num] = 1;
    1049                 :            :         }
    1050                 :            :     } else {
    1051                 :    1209470 :         std::vector<int> Nums = NumberAll(S, V);
    1052         [ +  + ]:     768933 :         for (int Num : Nums) {
    1053         [ +  + ]:     164198 :             if (Num < 0)
    1054                 :       6783 :                 continue;
    1055                 :     157415 :             MaybeResize(BBS, Num);
    1056                 :     157415 :             Uses[Num] = 1;
    1057                 :            :         }
    1058                 :            :     }
    1059                 :            : }
    1060                 :            : 
    1061                 :    5797270 : void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI) {
    1062         [ +  + ]:   14840200 :     for (Use &U : UI.operands()) {
    1063                 :    9042910 :         NoteUse(S, BBS, U);
    1064                 :            :     }
    1065                 :    5797270 : }
    1066                 :            : 
    1067                 :            : template <typename VisitInst, typename callback>
    1068                 :     109048 : void RecursivelyVisit(callback f, Value *V) {
    1069         [ +  + ]:     620406 :     for (Use &VU : V->uses()) {
    1070                 :     511358 :         User *TheUser = VU.getUser();
    1071         [ +  + ]:     511358 :         if (isa<VisitInst>(TheUser))
    1072                 :       4761 :             f(VU);
    1073   [ +  +  +  - ]:    1169880 :         if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
    1074   [ +  -  +  + ]:     518787 :             isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) || // TODO: should these be removed from this list?
    1075   [ +  -  +  + ]:     209433 :             isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser) ||
    1076         [ +  - ]:      35495 :             isa<ICmpInst>(TheUser) || // ICmpEQ/ICmpNE can be used with ptr types
    1077   [ +  +  -  +  :    1014190 :             isa<AtomicCmpXchgInst>(TheUser) || isa<AtomicRMWInst>(TheUser))
                   +  + ]
    1078                 :     494115 :             continue;
    1079   [ +  +  +  +  :      17243 :         if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
             +  -  +  - ]
    1080                 :      17243 :             RecursivelyVisit<VisitInst, callback>(f, TheUser);
    1081                 :      17243 :             continue;
    1082                 :            :         }
    1083                 :          0 :         llvm_dump(V);
    1084                 :          0 :         llvm_dump(TheUser);
    1085                 :          0 :         assert(false && "Unexpected instruction");
    1086                 :            :     }
    1087                 :     109048 : }
    1088                 :            : 
    1089                 :          0 : static void dumpBitVectorValues(State &S, BitVector &BV) {
    1090                 :          0 :     bool first = true;
    1091         [ #  # ]:          0 :     for (int Idx = BV.find_first(); Idx >= 0; Idx = BV.find_next(Idx)) {
    1092         [ #  # ]:          0 :         if (!first)
    1093                 :          0 :             dbgs() << ", ";
    1094                 :          0 :         first = false;
    1095                 :          0 :         S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
    1096                 :            :     }
    1097                 :          0 : }
    1098                 :            : 
    1099                 :            : /* Debugging utility to dump liveness information */
    1100                 :          0 : JL_USED_FUNC static void dumpLivenessState(Function &F, State &S) {
    1101         [ #  # ]:          0 :     for (auto &BB : F) {
    1102                 :          0 :         dbgs() << "Liveness analysis for BB " << BB.getName();
    1103                 :          0 :         dbgs() << "\n\tDefs: ";
    1104                 :          0 :         dumpBitVectorValues(S, S.BBStates[&BB].Defs);
    1105                 :          0 :         dbgs() << "\n\tPhiOuts: ";
    1106                 :          0 :         dumpBitVectorValues(S, S.BBStates[&BB].PhiOuts);
    1107                 :          0 :         dbgs() << "\n\tUpExposedUses: ";
    1108                 :          0 :         dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUses);
    1109                 :          0 :         dbgs() << "\n\tLiveIn: ";
    1110                 :          0 :         dumpBitVectorValues(S, S.BBStates[&BB].LiveIn);
    1111                 :          0 :         dbgs() << "\n\tLiveOut: ";
    1112                 :          0 :         dumpBitVectorValues(S, S.BBStates[&BB].LiveOut);
    1113                 :          0 :         dbgs() << "\n";
    1114                 :            :     }
    1115                 :          0 : }
    1116                 :            : 
    1117                 :    7745360 : static bool isTBAA(MDNode *TBAA, std::initializer_list<const char*> const strset)
    1118                 :            : {
    1119         [ +  + ]:    7745360 :     if (!TBAA)
    1120                 :     890362 :         return false;
    1121         [ +  + ]:   13012900 :     while (TBAA->getNumOperands() > 1) {
    1122                 :   11497100 :         TBAA = cast<MDNode>(TBAA->getOperand(1).get());
    1123                 :   11497100 :         auto str = cast<MDString>(TBAA->getOperand(0))->getString();
    1124         [ +  + ]:   32668800 :         for (auto str2 : strset) {
    1125         [ +  + ]:   26510800 :             if (str == str2) {
    1126                 :    5339150 :                 return true;
    1127                 :            :             }
    1128                 :            :         }
    1129                 :            :     }
    1130                 :    1515850 :     return false;
    1131                 :            : }
    1132                 :            : 
    1133                 :            : // Check if this is a load from an immutable value. The easiest
    1134                 :            : // way to do so is to look at the tbaa and see if it derives from
    1135                 :            : // jtbaa_immut.
    1136                 :    3805600 : static bool isLoadFromImmut(LoadInst *LI)
    1137                 :            : {
    1138         [ +  + ]:    3805600 :     if (LI->getMetadata(LLVMContext::MD_invariant_load))
    1139                 :    2539140 :         return true;
    1140                 :    1266460 :     MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
    1141         [ +  + ]:    1266460 :     if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"}))
    1142                 :      59731 :         return true;
    1143                 :    1206720 :     return false;
    1144                 :            : }
    1145                 :            : 
    1146                 :      66184 : static bool isConstGV(GlobalVariable *gv)
    1147                 :            : {
    1148   [ +  +  +  + ]:      66184 :     return gv->isConstant() || gv->getMetadata("julia.constgv");
    1149                 :            : }
    1150                 :            : 
    1151                 :            : typedef llvm::SmallPtrSet<PHINode*, 1> PhiSet;
    1152                 :            : 
    1153                 :            : static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen = nullptr);
    1154                 :      27911 : static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr)
    1155                 :            : {
    1156                 :      27911 :     v = v->stripInBoundsOffsets();
    1157         [ +  + ]:      27911 :     if (auto LI = dyn_cast<LoadInst>(v))
    1158                 :       3940 :         return isLoadFromConstGV(LI, task_local, seen);
    1159         [ +  + ]:      23971 :     if (auto gv = dyn_cast<GlobalVariable>(v))
    1160                 :       8480 :         return isConstGV(gv);
    1161                 :            :     // null pointer
    1162         [ +  + ]:      15491 :     if (isa<ConstantData>(v))
    1163                 :       4318 :         return true;
    1164                 :            :     // literal pointers
    1165         [ +  + ]:      11173 :     if (auto CE = dyn_cast<ConstantExpr>(v))
    1166   [ +  -  +  - ]:       3446 :         return (CE->getOpcode() == Instruction::IntToPtr &&
    1167                 :       3446 :                 isa<ConstantData>(CE->getOperand(0)));
    1168         [ +  + ]:       9450 :     if (auto SL = dyn_cast<SelectInst>(v))
    1169   [ +  -  +  - ]:      16960 :         return (isLoadFromConstGV(SL->getTrueValue(), task_local, seen) &&
    1170                 :      16960 :                 isLoadFromConstGV(SL->getFalseValue(), task_local, seen));
    1171         [ -  + ]:        970 :     if (auto Phi = dyn_cast<PHINode>(v)) {
    1172                 :          0 :         PhiSet ThisSet(&Phi, &Phi);
    1173         [ #  # ]:          0 :         if (!seen)
    1174                 :          0 :             seen = &ThisSet;
    1175         [ #  # ]:          0 :         else if (seen->count(Phi))
    1176                 :          0 :             return true;
    1177                 :            :         else
    1178                 :          0 :             seen->insert(Phi);
    1179                 :          0 :         auto n = Phi->getNumIncomingValues();
    1180         [ #  # ]:          0 :         for (unsigned i = 0; i < n; ++i) {
    1181         [ #  # ]:          0 :             if (!isLoadFromConstGV(Phi->getIncomingValue(i), task_local, seen)) {
    1182                 :          0 :                 return false;
    1183                 :            :             }
    1184                 :            :         }
    1185                 :          0 :         return true;
    1186                 :            :     }
    1187         [ +  - ]:        970 :     if (auto call = dyn_cast<CallInst>(v)) {
    1188                 :        970 :         auto callee = call->getCalledFunction();
    1189   [ +  -  +  +  :        970 :         if (callee && callee->getName() == "julia.typeof") {
                   +  + ]
    1190                 :        944 :             return true;
    1191                 :            :         }
    1192   [ +  -  -  +  :         26 :         if (callee && callee->getName() == "julia.get_pgcstack") {
                   -  + ]
    1193                 :          0 :             task_local = true;
    1194                 :          0 :             return true;
    1195                 :            :         }
    1196                 :            :     }
    1197         [ -  + ]:         26 :     if (isa<Argument>(v)) {
    1198                 :          0 :         task_local = true;
    1199                 :          0 :         return true;
    1200                 :            :     }
    1201                 :         26 :     return false;
    1202                 :            : }
    1203                 :            : 
    1204                 :            : // Check if this is can be traced through constant loads to an constant global
    1205                 :            : // or otherwise globally rooted value.
    1206                 :            : // Almost all `tbaa_const` loads satisfies this with the exception of
    1207                 :            : // task local constants which are constant as far as the code is concerned but aren't
    1208                 :            : // global constants. For task local constant `task_local` will be true when this function
    1209                 :            : // returns.
    1210                 :            : //
    1211                 :            : // The white list implemented here and above in `isLoadFromConstGV(Value*)` should
    1212                 :            : // cover all the cases we and LLVM generates.
    1213                 :    3939660 : static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
    1214                 :            : {
    1215                 :            :     // We only emit single slot GV in codegen
    1216                 :            :     // but LLVM global merging can change the pointer operands to GEPs/bitcasts
    1217                 :    3939660 :     auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
    1218                 :    3939660 :     auto gv = dyn_cast<GlobalVariable>(load_base);
    1219         [ +  + ]:    3939660 :     if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa),
    1220                 :            :                {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
    1221         [ +  + ]:    2740180 :         if (gv)
    1222                 :    2729230 :             return true;
    1223                 :      10951 :         return isLoadFromConstGV(load_base, task_local, seen);
    1224                 :            :     }
    1225         [ +  + ]:    1199480 :     if (gv)
    1226                 :      57704 :         return isConstGV(gv);
    1227                 :    1141780 :     return false;
    1228                 :            : }
    1229                 :            : 
    1230                 :      39913 : static uint64_t getLoadValueAlign(LoadInst *LI)
    1231                 :            : {
    1232                 :      39913 :     MDNode *md = LI->getMetadata(LLVMContext::MD_align);
    1233         [ -  + ]:      39913 :     if (!md)
    1234                 :          0 :         return 1;
    1235                 :      39913 :     return mdconst::extract<ConstantInt>(md->getOperand(0))->getLimitedValue();
    1236                 :            : }
    1237                 :            : 
    1238                 :     801964 : static bool LooksLikeFrameRef(Value *V) {
    1239         [ +  + ]:     801964 :     if (isSpecialPtr(V->getType()))
    1240                 :     321739 :         return false;
    1241                 :     480225 :     V = V->stripInBoundsOffsets();
    1242         [ -  + ]:     480225 :     if (isSpecialPtr(V->getType()))
    1243                 :          0 :         return false;
    1244                 :     480225 :     return isa<Argument>(V);
    1245                 :            : }
    1246                 :            : 
    1247                 :     119969 : SmallVector<int, 1> LateLowerGCFrame::GetPHIRefinements(PHINode *Phi, State &S)
    1248                 :            : {
    1249                 :            :     // The returned vector can violate the domination property of the Refinements map.
    1250                 :            :     // However, we can't know for sure if this is valid here since incoming values
    1251                 :            :     // that does not dominate the PHI node may be externally rooted (i.e. can be refined to -1)
    1252                 :            :     // We only know that after scanning the whole function so we'll record the possibly invalid
    1253                 :            :     // edges here and fix them up at the end of `LocalScan`. (See `FixUpRefinements` below).
    1254                 :     119969 :     auto nIncoming = Phi->getNumIncomingValues();
    1255                 :     119969 :     SmallVector<int, 1> RefinedPtr(nIncoming);
    1256         [ +  + ]:     419759 :     for (unsigned i = 0; i < nIncoming; ++i)
    1257                 :     299790 :         RefinedPtr[i] = Number(S, Phi->getIncomingValue(i));
    1258                 :     119969 :     return RefinedPtr;
    1259                 :            : }
    1260                 :            : 
    1261                 :          0 : JL_USED_FUNC static void DumpRefinements(State *S)
    1262                 :            : {
    1263         [ #  # ]:          0 :     for (auto &kv: S->Refinements) {
    1264                 :          0 :         int Num = kv.first;
    1265         [ #  # ]:          0 :         if (Num < 0)
    1266                 :          0 :             continue;
    1267                 :          0 :         dbgs() << "Refinements for " << Num << "  --  ";
    1268                 :          0 :         auto V = S->ReversePtrNumbering[Num];
    1269                 :          0 :         llvm_dump(V);
    1270         [ #  # ]:          0 :         for (auto refine: kv.second) {
    1271         [ #  # ]:          0 :             if (refine < 0) {
    1272                 :          0 :                 dbgs() << "  " << (int)refine;
    1273                 :          0 :                 continue;
    1274                 :            :             }
    1275                 :          0 :             dbgs() << "  " << (int)refine << ": ";
    1276                 :          0 :             auto R = S->ReversePtrNumbering[refine];
    1277                 :          0 :             llvm_dump(R);
    1278                 :            :         }
    1279                 :            :     }
    1280                 :          0 : }
    1281                 :            : 
    1282                 :     131071 : void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
    1283                 :            : {
    1284                 :            :     // Now we have all the possible refinement information, we can remove ones for the invalid
    1285                 :            : 
    1286                 :            :     // * First find all values that must be externally rooted.
    1287                 :            :     //   Values may either be obviously externally rooted (e.g. arguments) - (this is indicated by a
    1288                 :            :     //   value of -1 or -2 in the refinement map), or may be externally rooted by refinement to other
    1289                 :            :     //   values. Thus a value is not externally rooted if it either:
    1290                 :            :     //   either:
    1291                 :            :     //     - Has no refinements (all obiviously externally rooted values are annotated by -1/-2 in the
    1292                 :            :     //       refinement map).
    1293                 :            :     //     - Recursively reaches a not-externally rooted value through its refinements
    1294                 :            :     //
    1295                 :            :     //   We compute this set by first assuming all values are externally rooted and then iteratively
    1296                 :            :     //   removing the ones that are not.
    1297                 :     262142 :     BitVector extern_rooted(S.MaxPtrNumber + 1, true);
    1298                 :     262142 :     BitVector perm_rooted(S.MaxPtrNumber + 1, true);
    1299                 :            : 
    1300                 :            :     //   * First clear all values that are not derived from anything.
    1301                 :            :     //     This only needs to be done once.
    1302         [ +  + ]:    2077830 :     for (int i = 0; i <= S.MaxPtrNumber; i++) {
    1303                 :    1946760 :         auto it = S.Refinements.find(i);
    1304   [ +  +  -  +  :    1946760 :         if (it == S.Refinements.end() || it->second.empty()) {
                   +  + ]
    1305                 :    1038370 :             extern_rooted[i] = false;
    1306                 :    1038370 :             perm_rooted[i] = false;
    1307                 :            :         }
    1308                 :            :     }
    1309                 :            :     //   * Then remove values reachable from those values recursively
    1310                 :            :     bool changed;
    1311         [ +  + ]:     228194 :     do {
    1312                 :     228194 :         changed = false;
    1313         [ +  + ]:    2141100 :         for (auto &kv: S.Refinements) {
    1314                 :    1912910 :             int Num = kv.first;
    1315                 :            :             // Already cleared.
    1316         [ +  + ]:    1912910 :             if (!HasBitSet(extern_rooted, Num))
    1317                 :     182809 :                 continue;
    1318         [ +  + ]:    3427850 :             for (auto refine: kv.second) {
    1319         [ +  + ]:    1841120 :                 if (refine == -2) {
    1320                 :    1165130 :                     continue;
    1321                 :            :                 }
    1322         [ +  + ]:     675990 :                 else if (refine == -1) {
    1323         [ +  + ]:     422339 :                     if (HasBitSet(perm_rooted, Num)) {
    1324                 :     205595 :                         changed = true;
    1325                 :     205595 :                         perm_rooted[Num] = false;
    1326                 :            :                     }
    1327                 :     422339 :                     continue;
    1328                 :            :                 }
    1329         [ +  + ]:     253651 :                 else if (!HasBitSet(extern_rooted, refine)) {
    1330                 :     143376 :                     changed = true;
    1331                 :     143376 :                     extern_rooted[Num] = false;
    1332                 :     143376 :                     perm_rooted[Num] = false;
    1333                 :     143376 :                     break;
    1334                 :            :                 }
    1335         [ +  + ]:     110275 :                 else if (!HasBitSet(perm_rooted, refine)) {
    1336         [ +  + ]:      38622 :                     if (HasBitSet(perm_rooted, Num)) {
    1337                 :      13168 :                         changed = true;
    1338                 :      13168 :                         perm_rooted[Num] = false;
    1339                 :            :                     }
    1340                 :            :                 }
    1341                 :            :             }
    1342                 :            :         }
    1343                 :            :     } while (changed);
    1344                 :            :     //   * Now the `extern_rooted` and `perm_rooted` map is accurate,
    1345                 :            :     //     normalize all externally rooted values.
    1346         [ +  + ]:    1039460 :     for (auto &kv: S.Refinements) {
    1347                 :     908390 :         int Num = kv.first;
    1348         [ +  + ]:     908390 :         if (HasBitSet(perm_rooted, Num)) {
    1349                 :            :             // For permanently rooted values, set their refinements simply to `{-2}`
    1350                 :     546947 :             kv.second.resize(1);
    1351                 :     546947 :             kv.second[0] = -2;
    1352                 :     546947 :             continue;
    1353                 :            :         }
    1354         [ +  + ]:     361443 :         else if (HasBitSet(extern_rooted, Num)) {
    1355                 :            :             // For externally rooted values, set their refinements simply to `{-1}`
    1356                 :     218067 :             kv.second.resize(1);
    1357                 :     218067 :             kv.second[0] = -1;
    1358                 :     218067 :             continue;
    1359                 :            :         }
    1360         [ +  + ]:     457068 :         for (auto &refine: kv.second) {
    1361                 :            :             // For other values,
    1362                 :            :             // remove all externally rooted values from their refinements (replace with -1)
    1363                 :            :             // No need to handle -2 specially since it won't make a difference.
    1364         [ +  + ]:     313692 :             if (HasBitSet(extern_rooted, refine)) {
    1365                 :      19561 :                 refine = -1;
    1366                 :            :             }
    1367                 :            :         }
    1368                 :            :     }
    1369                 :            :     // Scan all phi node refinements and remove all invalid ones.
    1370                 :            :     // As a generalization to what we did to externally rooted values above,
    1371                 :            :     // we can also relax non-dominating (invalid) refinements to the refinements of those values
    1372                 :            :     // If all of those values dominate the phi node then the phi node can be refined to
    1373                 :            :     // those values instead.
    1374                 :            :     // While we recursively relax the refinement, we need to keep track of the values we've
    1375                 :            :     // visited in order to not scan them again.
    1376                 :     262142 :     BitVector visited(S.MaxPtrNumber + 1, false);
    1377         [ +  + ]:     280670 :     for (auto Num: PHINumbers) {
    1378                 :            :         // Not sure if `Num` can be `-1`
    1379   [ +  -  +  +  :     149599 :         if (Num < 0 || HasBitSet(extern_rooted, Num))
                   +  + ]
    1380                 :      16610 :             continue;
    1381                 :            :         // N.B.: We reset the bit vector below on every iteration
    1382                 :     132989 :         visited[Num] = true;
    1383                 :     132989 :         auto Phi = cast<PHINode>(S.ReversePtrNumbering[Num]);
    1384                 :     132989 :         auto &RefinedPtr = S.Refinements[Num];
    1385                 :     132989 :         unsigned j = 0; // new length
    1386         [ +  + ]:     270093 :         for (unsigned i = 0; i < RefinedPtr.size(); i++) {
    1387                 :     214143 :             auto refine = RefinedPtr[i];
    1388   [ +  +  +  +  :     214143 :             if (refine < 0 || visited[refine])
                   +  + ]
    1389                 :     137104 :                 continue;
    1390                 :     157389 :             visited[refine] = true;
    1391         [ +  + ]:     157389 :             if (i != j)
    1392                 :      25986 :                 RefinedPtr[j] = refine;
    1393                 :     157389 :             j++;
    1394         [ +  - ]:     157389 :             if (auto inst = dyn_cast<Instruction>(S.ReversePtrNumbering[refine])) {
    1395         [ +  + ]:     157389 :                 if (!S.DT)
    1396                 :      24567 :                     S.DT = &GetDT();
    1397         [ +  + ]:     157389 :                 if (S.DT->dominates(inst, Phi))
    1398                 :      80350 :                     continue;
    1399                 :            :                 // Decrement `j` so we'll overwrite/ignore it.
    1400                 :     113235 :                 j--;
    1401                 :            :                 // Non-dominating refinement
    1402                 :     113235 :                 auto it = S.Refinements.find(refine);
    1403   [ +  +  +  +  :     113235 :                 if (it != S.Refinements.end() && !it->second.empty()) {
                   +  + ]
    1404                 :            :                     // Found a replacement, replace current element.
    1405                 :      36196 :                     auto &NewRefinedPtr = it->second;
    1406                 :      36196 :                     unsigned n = NewRefinedPtr.size();
    1407                 :            :                     // First fill in the gap between `i` and `j`
    1408                 :      36196 :                     unsigned k = 0;
    1409   [ +  +  +  + ]:      75226 :                     for (; k < n && i >= j + k; k++)
    1410                 :      39030 :                         RefinedPtr[i - k] = NewRefinedPtr[k];
    1411                 :      36196 :                     i = i - k;
    1412         [ +  + ]:      36196 :                     if (k < n)
    1413                 :      11475 :                         RefinedPtr.append(it->second.begin() + k, it->second.end());
    1414                 :      36196 :                     continue;
    1415                 :            :                 }
    1416                 :            :                 // Invalid - Remove All refinements
    1417                 :      77039 :                 RefinedPtr.resize(0);
    1418                 :      77039 :                 break;
    1419                 :            :             }
    1420                 :            :         }
    1421         [ +  + ]:     132989 :         if (!RefinedPtr.empty()) {
    1422                 :            :             // `j == 0` here means that everything is externally rooted.
    1423                 :            :             // This should have been handled by the first loop above.
    1424   [ +  -  +  - ]:      26320 :             assert(j != 0 && j <= RefinedPtr.size());
    1425                 :      26320 :             RefinedPtr.resize(j);
    1426                 :            :         }
    1427                 :     132989 :         visited.reset();
    1428                 :            :     }
    1429                 :     131071 : }
    1430                 :            : 
    1431                 :     131071 : State LateLowerGCFrame::LocalScan(Function &F) {
    1432                 :     131071 :     State S(F);
    1433                 :     262142 :     SmallVector<int, 8> PHINumbers;
    1434         [ +  + ]:    1765730 :     for (BasicBlock &BB : F) {
    1435                 :    1634660 :         BBState &BBS = S.BBStates[&BB];
    1436         [ +  + ]:   23061900 :         for (auto it = BB.rbegin(); it != BB.rend(); ++it) {
    1437                 :   21427200 :             Instruction &I = *it;
    1438         [ +  + ]:   21427200 :             if (CallInst *CI = dyn_cast<CallInst>(&I)) {
    1439         [ +  + ]:    1484160 :                 if (isa<IntrinsicInst>(CI)) {
    1440                 :            :                     // Most intrinsics are not gc uses/defs, however some have
    1441                 :            :                     // memory operands and could thus be GC uses. To be conservative,
    1442                 :            :                     // we only skip processing for those that we know we emit often
    1443                 :            :                     // and cannot possibly be GC uses.
    1444                 :     335537 :                     IntrinsicInst *II = cast<IntrinsicInst>(CI);
    1445         [ +  + ]:     552443 :                     if (isa<DbgInfoIntrinsic>(CI) ||
    1446   [ +  +  +  +  :     552443 :                         II->getIntrinsicID() == Intrinsic::lifetime_start ||
                   +  + ]
    1447                 :     165130 :                         II->getIntrinsicID() == Intrinsic::lifetime_end) {
    1448                 :     660227 :                         continue;
    1449                 :            :                     }
    1450   [ +  -  -  +  :     270070 :                     if (II->getIntrinsicID() == Intrinsic::masked_load ||
                   -  + ]
    1451                 :     135035 :                         II->getIntrinsicID() == Intrinsic::masked_gather) {
    1452         [ #  # ]:          0 :                         if (auto VTy = dyn_cast<VectorType>(II->getType())) {
    1453         [ #  # ]:          0 :                             if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
    1454         [ #  # ]:          0 :                                 if (isSpecialPtr(PtrT)) {
    1455                 :            :                                     // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
    1456                 :            :                                     // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
    1457                 :          0 :                                     Value *passthru = II->getArgOperand(3);
    1458         [ #  # ]:          0 :                                     if (isa<UndefValue>(passthru)) {
    1459                 :          0 :                                         II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
    1460                 :            :                                     }
    1461         [ #  # ]:          0 :                                     if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
    1462                 :            :                                         // These are not real defs
    1463                 :          0 :                                         continue;
    1464                 :            :                                     }
    1465                 :            :                                 }
    1466                 :            :                             }
    1467                 :            :                         }
    1468                 :            :                     }
    1469                 :            :                 }
    1470                 :    1283660 :                 auto callee = CI->getCalledFunction();
    1471   [ +  +  +  + ]:    1283660 :                 if (callee && callee == typeof_func) {
    1472                 :     125942 :                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints, SmallVector<int, 1>{-2});
    1473                 :            :                 }
    1474                 :            :                 else {
    1475                 :    1157720 :                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints);
    1476                 :            :                 }
    1477         [ +  + ]:    1283660 :                 if (CI->hasStructRetAttr()) {
    1478                 :      22406 :                     Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
    1479         [ -  + ]:      22406 :                     assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
    1480                 :      22406 :                     auto tracked = CountTrackedPointers(ElT);
    1481         [ +  + ]:      22406 :                     if (tracked.count) {
    1482                 :      13074 :                         AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
    1483         [ -  + ]:      13074 :                         assert(SRet);
    1484                 :            :                         {
    1485   [ +  -  -  +  :      13074 :                             if (!(SRet->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
             -  -  +  - ]
    1486         [ -  + ]:      13074 :                                 assert(!tracked.derived);
    1487         [ +  + ]:      13074 :                                 if (tracked.all) {
    1488                 :       5381 :                                     S.ArrayAllocas[SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize())->getZExtValue();
    1489                 :            :                                 }
    1490                 :            :                                 else {
    1491                 :       7693 :                                     Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets();
    1492                 :       7693 :                                     AllocaInst *SRet_gc = nullptr;
    1493         [ -  + ]:       7693 :                                     if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
    1494         [ #  # ]:          0 :                                         for (Value *V : Phi->incoming_values()) {
    1495         [ #  # ]:          0 :                                             if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
    1496         [ #  # ]:          0 :                                                 if (SRet_gc == nullptr) {
    1497                 :          0 :                                                     SRet_gc = Alloca;
    1498         [ #  # ]:          0 :                                                 } else if (SRet_gc == Alloca) {
    1499                 :          0 :                                                     continue;
    1500                 :            :                                                 } else {
    1501                 :          0 :                                                     llvm_dump(Alloca);
    1502                 :          0 :                                                     llvm_dump(SRet_gc);
    1503                 :          0 :                                                     assert(false && "Allocas in Phi node should match");
    1504                 :            :                                                 }
    1505                 :            :                                             } else {
    1506                 :          0 :                                                 llvm_dump(V->stripInBoundsOffsets());
    1507                 :          0 :                                                 assert(false && "Expected alloca");
    1508                 :            :                                             }
    1509                 :            :                                         }
    1510                 :            :                                     } else {
    1511                 :       7693 :                                         SRet_gc = dyn_cast<AllocaInst>(arg1);
    1512                 :            :                                     }
    1513         [ -  + ]:       7693 :                                     if (!SRet_gc) {
    1514                 :          0 :                                         llvm_dump(CI);
    1515                 :          0 :                                         llvm_dump(arg1);
    1516                 :          0 :                                         assert(false && "Expected alloca");
    1517                 :            :                                     }
    1518                 :       7693 :                                     Type *ElT = SRet_gc->getAllocatedType();
    1519   [ +  -  -  +  :       7693 :                                     if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
             -  -  +  - ]
    1520                 :       7693 :                                         S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
    1521                 :            :                                     }
    1522                 :            :                                 }
    1523                 :            :                             }
    1524                 :            :                         }
    1525                 :            :                     }
    1526                 :            :                 }
    1527                 :    1283660 :                 NoteOperandUses(S, BBS, I);
    1528         [ +  + ]:    1283660 :                 if (CI->canReturnTwice()) {
    1529                 :       4838 :                     S.ReturnsTwice.push_back(CI);
    1530                 :            :                 }
    1531         [ +  + ]:    1283660 :                 if (callee) {
    1532         [ +  + ]:    1203220 :                     if (callee == gc_preserve_begin_func) {
    1533                 :      27874 :                         std::vector<int> args;
    1534         [ +  + ]:      28812 :                         for (Use &U : CI->args()) {
    1535                 :      14875 :                             Value *V = U;
    1536         [ -  + ]:      14875 :                             if (isa<Constant>(V))
    1537                 :          0 :                                 continue;
    1538         [ +  + ]:      14875 :                             if (isa<PointerType>(V->getType())) {
    1539         [ +  - ]:      14739 :                                 if (isSpecialPtr(V->getType())) {
    1540                 :      14739 :                                     int Num = Number(S, V);
    1541         [ +  - ]:      14739 :                                     if (Num >= 0)
    1542                 :      14739 :                                         args.push_back(Num);
    1543                 :            :                                 }
    1544                 :            :                             } else {
    1545                 :        272 :                                 std::vector<int> Nums = NumberAll(S, V);
    1546         [ +  + ]:        272 :                                 for (int Num : Nums) {
    1547         [ -  + ]:        136 :                                     if (Num < 0)
    1548                 :          0 :                                         continue;
    1549                 :        136 :                                     args.push_back(Num);
    1550                 :            :                                 }
    1551                 :            :                             }
    1552                 :            :                         }
    1553                 :      13937 :                         S.GCPreserves[CI] = args;
    1554                 :      13937 :                         continue;
    1555                 :            :                     }
    1556                 :            :                     // Known functions emitted in codegen that are not safepoints
    1557         [ +  - ]:    1156260 :                     if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
    1558   [ +  +  +  + ]:    1156260 :                         callee == gc_preserve_end_func || callee == typeof_func ||
    1559   [ +  +  +  +  :    1896520 :                         callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
                   +  - ]
    1560         [ +  - ]:    1760130 :                         callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
    1561   [ +  +  +  +  :    3196260 :                         callee == write_barrier_func || callee == write_barrier_binding_func ||
             +  +  -  + ]
    1562         [ +  + ]:    2040010 :                         callee->getName() == "memcmp") {
    1563                 :     338552 :                         continue;
    1564                 :            :                     }
    1565         [ +  + ]:    1694270 :                     if (callee->hasFnAttribute(Attribute::ReadNone) ||
    1566   [ +  +  +  +  :    1694270 :                         callee->hasFnAttribute(Attribute::ReadOnly) ||
                   +  + ]
    1567                 :     843542 :                         callee->hasFnAttribute(Attribute::ArgMemOnly)) {
    1568                 :     104364 :                         continue;
    1569                 :            :                     }
    1570         [ -  + ]:     746364 :                     if (MemTransferInst *MI = dyn_cast<MemTransferInst>(CI)) {
    1571                 :          0 :                         MaybeTrackDst(S, MI);
    1572                 :            :                     }
    1573                 :            :                 }
    1574   [ +  -  +  - ]:    2474670 :                 if (isa<IntrinsicInst>(CI) || CI->hasFnAttr(Attribute::ArgMemOnly) ||
    1575   [ +  +  -  +  :    2474670 :                     CI->hasFnAttr(Attribute::ReadNone) || CI->hasFnAttr(Attribute::ReadOnly)) {
                   +  + ]
    1576                 :            :                     // Intrinsics are never safepoints.
    1577                 :       2872 :                     continue;
    1578                 :            :                 }
    1579                 :    1647860 :                 std::vector<int> CalleeRoots;
    1580         [ +  + ]:    2796800 :                 for (Use &U : CI->args()) {
    1581                 :            :                     // Find all callee rooted arguments.
    1582                 :            :                     // Record them instead of simply remove them from live values here
    1583                 :            :                     // since they can be useful during refinement
    1584                 :            :                     // (e.g. to remove roots of objects that are refined to these)
    1585                 :    1972870 :                     Value *V = U;
    1586   [ +  +  +  +  :    3124000 :                     if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
             +  +  +  + ]
    1587                 :    1151130 :                         getValueAddrSpace(V) != AddressSpace::CalleeRooted)
    1588                 :    1862760 :                         continue;
    1589                 :     177866 :                     V = V->stripPointerCasts();
    1590         [ +  + ]:     177866 :                     if (!isTrackedValue(V))
    1591                 :      67762 :                         continue;
    1592                 :     110104 :                     auto Num = Number(S, V);
    1593         [ -  + ]:     110104 :                     if (Num < 0)
    1594                 :          0 :                         continue;
    1595                 :     110104 :                     CalleeRoots.push_back(Num);
    1596                 :            :                 }
    1597                 :     823932 :                 int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
    1598                 :     823932 :                 BBS.HasSafepoint = true;
    1599                 :     823932 :                 BBS.TopmostSafepoint = SafepointNumber;
    1600                 :     823932 :                 BBS.Safepoints.push_back(SafepointNumber);
    1601         [ +  + ]:   19943000 :             } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
    1602                 :            :                 // If this is a load from an immutable, we know that
    1603                 :            :                 // this object will always be rooted as long as the
    1604                 :            :                 // object we're loading from is, so we can refine uses
    1605                 :            :                 // of this object to uses of the object we're loading
    1606                 :            :                 // from.
    1607                 :    7611190 :                 SmallVector<int, 1> RefinedPtr{};
    1608                 :    3805600 :                 Type *Ty = LI->getType()->getScalarType();
    1609                 :    3805600 :                 bool task_local = false;
    1610   [ +  +  +  +  :    3805600 :                 if (isLoadFromImmut(LI) && isSpecialPtr(LI->getPointerOperand()->getType())) {
                   +  + ]
    1611                 :     147836 :                     RefinedPtr.push_back(Number(S, LI->getPointerOperand()));
    1612         [ +  + ]:    6784140 :                 } else if (LI->getType()->isPointerTy() &&
    1613   [ +  +  +  +  :    6784140 :                         isSpecialPtr(Ty) &&
                   +  + ]
    1614                 :     801964 :                         LooksLikeFrameRef(LI->getPointerOperand())) {
    1615                 :            :                     // Loads from a jlcall argument array
    1616                 :     155811 :                     RefinedPtr.push_back(-1);
    1617                 :            :                 }
    1618         [ +  + ]:    3501950 :                 else if (isLoadFromConstGV(LI, task_local)) {
    1619                 :            :                     // If this is a const load from a global,
    1620                 :            :                     // we know that the object is a constant as well and doesn't need rooting.
    1621                 :            :                     // If this is a task local constant, we don't need to root it within the
    1622                 :            :                     // task but we do need to issue write barriers for when the current task dies.
    1623         [ -  + ]:    2307500 :                     RefinedPtr.push_back(task_local ? -1 : -2);
    1624                 :            :                 }
    1625   [ +  +  +  +  :    3805600 :                 if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
                   +  + ]
    1626                 :    3673090 :                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
    1627                 :            :                 }
    1628                 :    3805600 :                 NoteOperandUses(S, BBS, I);
    1629         [ +  + ]:   16137400 :             } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
    1630                 :         44 :                 Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
    1631   [ +  +  +  -  :         44 :                 if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
                   +  - ]
    1632                 :         44 :                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
    1633                 :            :                 }
    1634                 :         44 :                 NoteOperandUses(S, BBS, I);
    1635                 :            :                 // TODO: do we need MaybeTrackStore(S, LI);
    1636         [ +  + ]:   16137400 :             } else if (auto *LI = dyn_cast<AtomicRMWInst>(&I)) {
    1637                 :       1002 :                 Type *Ty = LI->getType()->getScalarType();
    1638   [ -  +  -  -  :       1002 :                 if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
                   +  - ]
    1639                 :       1002 :                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
    1640                 :            :                 }
    1641                 :       1002 :                 NoteOperandUses(S, BBS, I);
    1642                 :            :                 // TODO: do we need MaybeTrackStore(S, LI);
    1643         [ +  + ]:   16136400 :             } else if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
    1644                 :     415959 :                 auto tracked = CountTrackedPointers(SI->getType());
    1645   [ +  +  +  + ]:     415959 :                 if (tracked.count && !tracked.derived) {
    1646                 :            :                     // record the select definition of these values
    1647                 :      56856 :                     SmallVector<int, 2> RefinedPtr;
    1648         [ +  + ]:      28428 :                     if (isa<PointerType>(SI->getType())) {
    1649                 :            :                         // TODO: Refinements for vector select
    1650                 :      23649 :                         RefinedPtr = {
    1651                 :      23649 :                             Number(S, SI->getTrueValue()),
    1652                 :      23649 :                             Number(S, SI->getFalseValue())
    1653                 :      23649 :                         };
    1654                 :            :                     }
    1655                 :      28428 :                     MaybeNoteDef(S, BBS, SI, BBS.Safepoints, std::move(RefinedPtr));
    1656                 :      56856 :                     NoteOperandUses(S, BBS, I);
    1657         [ +  + ]:     387531 :                 } else if (tracked.count) {
    1658                 :            :                     // We need to insert extra selects for the GC roots
    1659                 :      13309 :                     LiftSelect(S, SI);
    1660                 :            :                 }
    1661         [ +  + ]:   15720400 :             } else if (PHINode *Phi = dyn_cast<PHINode>(&I)) {
    1662                 :     449942 :                 auto tracked = CountTrackedPointers(Phi->getType());
    1663   [ +  +  +  - ]:     449942 :                 if (tracked.count && !tracked.derived) {
    1664                 :            :                     // record the phi definition of these values
    1665                 :     270174 :                     SmallVector<int, 1> PHIRefinements;
    1666         [ +  + ]:     135087 :                     if (isa<PointerType>(Phi->getType()))
    1667                 :            :                         // TODO: Vector refinements
    1668                 :     119969 :                         PHIRefinements = GetPHIRefinements(Phi, S);
    1669                 :     135087 :                     MaybeNoteDef(S, BBS, Phi, BBS.Safepoints, std::move(PHIRefinements));
    1670         [ +  + ]:     135087 :                     if (isa<PointerType>(Phi->getType())) {
    1671                 :     119969 :                         PHINumbers.push_back(Number(S, Phi));
    1672                 :            :                     } else {
    1673                 :      30236 :                         std::vector<int> Nums = NumberAll(S, Phi);
    1674         [ +  + ]:      44748 :                         for (int Num : Nums)
    1675                 :      29630 :                             PHINumbers.push_back(Num);
    1676                 :            :                     }
    1677                 :     135087 :                     unsigned nIncoming = Phi->getNumIncomingValues();
    1678         [ +  + ]:     475253 :                     for (unsigned i = 0; i < nIncoming; ++i) {
    1679                 :     340166 :                         BBState &IncomingBBS = S.BBStates[Phi->getIncomingBlock(i)];
    1680                 :     340166 :                         NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts);
    1681                 :     135087 :                     }
    1682         [ -  + ]:     314855 :                 } else if (tracked.count) {
    1683                 :            :                     // We need to insert extra phis for the GC roots
    1684                 :          0 :                     LiftPhi(S, Phi);
    1685                 :            :                 }
    1686         [ +  + ]:   15270500 :             } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
    1687                 :     554224 :                 NoteOperandUses(S, BBS, I);
    1688                 :     554224 :                 MaybeTrackStore(S, SI);
    1689         [ +  + ]:   14716300 :             } else if (isa<ReturnInst>(&I)) {
    1690                 :     124317 :                 NoteOperandUses(S, BBS, I);
    1691         [ +  + ]:   14592000 :             } else if (auto *ASCI = dyn_cast<AddrSpaceCastInst>(&I)) {
    1692         [ +  + ]:    1488560 :                 if (isTrackedValue(ASCI)) {
    1693                 :     809584 :                     SmallVector<int, 1> RefinedPtr{};
    1694                 :     404792 :                     bool task_local = false;
    1695                 :     404792 :                     auto origin = ASCI->getPointerOperand()->stripPointerCasts();
    1696         [ +  + ]:     404792 :                     if (auto LI = dyn_cast<LoadInst>(origin)) {
    1697         [ +  - ]:     393864 :                         if (isLoadFromConstGV(LI, task_local)) {
    1698         [ -  + ]:     393864 :                             RefinedPtr.push_back(task_local ? -1 : -2);
    1699                 :            :                         }
    1700                 :            :                     }
    1701                 :     404792 :                     MaybeNoteDef(S, BBS, ASCI, BBS.Safepoints, std::move(RefinedPtr));
    1702                 :            :                 }
    1703         [ +  + ]:   13103400 :             } else if (auto *AI = dyn_cast<AllocaInst>(&I)) {
    1704                 :     323915 :                 Type *ElT = AI->getAllocatedType();
    1705   [ +  +  +  +  :     323915 :                 if (AI->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked) {
             +  +  +  + ]
    1706                 :      69288 :                     S.Allocas.push_back(AI);
    1707                 :            :                 }
    1708                 :            :             }
    1709                 :            :         }
    1710                 :            :         // Pre-seed the dataflow variables;
    1711                 :    1634660 :         BBS.LiveIn = BBS.UpExposedUses;
    1712                 :    1634660 :         BBS.Done = true;
    1713                 :            :     }
    1714                 :     131071 :     FixUpRefinements(PHINumbers, S);
    1715                 :     131071 :     return S;
    1716                 :            : }
    1717                 :            : 
    1718                 :      23756 : static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
    1719                 :      23756 :     Type *T_int32 = Type::getInt32Ty(V->getContext());
    1720         [ +  + ]:      23756 :     if (isptr) {
    1721                 :       5304 :         std::vector<Value*> IdxList{Idxs.size() + 1};
    1722                 :       2652 :         IdxList[0] = ConstantInt::get(T_int32, 0);
    1723         [ +  + ]:       5619 :         for (unsigned j = 0; j < Idxs.size(); ++j) {
    1724                 :       2967 :             IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
    1725                 :            :         }
    1726                 :       2652 :         Value *GEP = irbuilder.CreateInBoundsGEP(VTy, V, IdxList);
    1727                 :       2652 :         Type *T = GetElementPtrInst::getIndexedType(VTy, IdxList);
    1728         [ -  + ]:       2652 :         assert(T->isPointerTy());
    1729                 :       2652 :         V = irbuilder.CreateAlignedLoad(T, GEP, Align(sizeof(void*)));
    1730                 :            :         // since we're doing stack operations, it should be safe do this non-atomically
    1731                 :       2652 :         cast<LoadInst>(V)->setOrdering(AtomicOrdering::NotAtomic);
    1732                 :            :     }
    1733         [ -  + ]:      21104 :     else if (isa<PointerType>(V->getType())) {
    1734         [ #  # ]:          0 :         assert(Idxs.empty());
    1735                 :            :     }
    1736         [ +  - ]:      21104 :     else if (!Idxs.empty()) {
    1737                 :      21104 :         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
    1738                 :      21104 :         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
    1739                 :      21104 :         bool IsVector = isa<VectorType>(FinalT);
    1740         [ +  - ]:      21104 :         if (Idxs.size() > IsVector)
    1741         [ -  + ]:      21104 :             V = irbuilder.Insert(ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs));
    1742         [ -  + ]:      21104 :         if (IsVector)
    1743                 :          0 :             V = irbuilder.Insert(ExtractElementInst::Create(V,
    1744                 :          0 :                     ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back())));
    1745                 :            :     }
    1746                 :      23756 :     return V;
    1747                 :            : }
    1748                 :            : 
    1749                 :       1814 : static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigned> Idxs)
    1750                 :            : {
    1751                 :       1814 :     SmallVector<Value*,4> IdxList{Idxs.size() + 1};
    1752                 :       1814 :     Type *T_int32 = Type::getInt32Ty(STy->getContext());
    1753                 :       1814 :     IdxList[0] = ConstantInt::get(T_int32, 0);
    1754         [ +  + ]:       3690 :     for (unsigned j = 0; j < Idxs.size(); ++j)
    1755                 :       1876 :         IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
    1756                 :       1814 :     auto offset = DL.getIndexedOffsetInType(STy, IdxList);
    1757         [ -  + ]:       1814 :     assert(offset >= 0);
    1758                 :       1814 :     return (unsigned)offset;
    1759                 :            : }
    1760                 :            : 
    1761                 :      12922 : std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
    1762                 :      25844 :     auto Tracked = TrackCompositeType(STy);
    1763                 :      12922 :     std::vector<Value*> Ptrs;
    1764                 :      12922 :     unsigned perm_idx = 0;
    1765                 :      25478 :     auto ignore_field = [&] (ArrayRef<unsigned> Idxs) {
    1766         [ +  + ]:      25478 :         if (perm_idx >= perm_offsets.size())
    1767                 :      23664 :             return false;
    1768                 :            :         // Assume the indices returned from `TrackCompositeType` is ordered and do a
    1769                 :            :         // single pass over `perm_offsets`.
    1770         [ -  + ]:       1814 :         assert(!isptr);
    1771                 :       1814 :         auto offset = getFieldOffset(irbuilder.GetInsertBlock()->getModule()->getDataLayout(),
    1772                 :       1814 :                                      STy, Idxs);
    1773                 :          0 :         do {
    1774                 :       1814 :             auto perm_offset = perm_offsets[perm_idx];
    1775         [ +  + ]:       1814 :             if (perm_offset > offset)
    1776                 :         92 :                 return false;
    1777                 :       1722 :             perm_idx++;
    1778         [ +  - ]:       1722 :             if (perm_offset == offset) {
    1779                 :       1722 :                 return true;
    1780                 :            :             }
    1781         [ #  # ]:          0 :         } while (perm_idx < perm_offsets.size());
    1782                 :          0 :         return false;
    1783                 :      12922 :     };
    1784         [ +  + ]:      38400 :     for (unsigned i = 0; i < Tracked.size(); ++i) {
    1785                 :      25478 :         auto Idxs = makeArrayRef(Tracked[i]);
    1786         [ +  + ]:      25478 :         if (ignore_field(Idxs))
    1787                 :       1722 :             continue;
    1788                 :      23756 :         Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder);
    1789                 :      23756 :         Ptrs.push_back(Elem);
    1790                 :            :     }
    1791                 :      12922 :     return Ptrs;
    1792                 :            : }
    1793                 :            : 
    1794                 :       5970 : unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
    1795                 :       5970 :     auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
    1796         [ +  + ]:      19189 :     for (unsigned i = 0; i < Ptrs.size(); ++i) {
    1797                 :      13219 :         Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
    1798                 :      13219 :         Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
    1799         [ -  + ]:      13219 :         assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
    1800                 :      13219 :         StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
    1801                 :      13219 :         shadowStore->setOrdering(AtomicOrdering::NotAtomic);
    1802                 :            :         // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
    1803                 :            :     }
    1804                 :       5970 :     return Ptrs.size();
    1805                 :            : }
    1806                 :            : 
    1807                 :            : 
    1808                 :            : // turn a memcpy into a set of loads
    1809                 :          0 : void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) {
    1810                 :            :     //Value *Dst = MI->getRawDest()->stripInBoundsOffsets();
    1811                 :            :     //if (AllocaInst *AI = dyn_cast<AllocaInst>(Dst)) {
    1812                 :            :     //    Type *STy = AI->getAllocatedType();
    1813                 :            :     //    if (!AI->isStaticAlloca() || (isa<PointerType>(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI))
    1814                 :            :     //        return; // already numbered this
    1815                 :            :     //    auto tracked = CountTrackedPointers(STy);
    1816                 :            :     //    unsigned nroots = tracked.count * cast<ConstantInt>(AI->getArraySize())->getZExtValue();
    1817                 :            :     //    if (nroots) {
    1818                 :            :     //        assert(!tracked.derived);
    1819                 :            :     //        if (!tracked.all) {
    1820                 :            :     //            // materialize shadow LoadInst and StoreInst ops to make a copy of just the tracked values inside
    1821                 :            :     //            //assert(MI->getLength() == DL.getTypeAllocSize(AI->getAllocatedType()) && !AI->isArrayAllocation()); // XXX: handle partial copy
    1822                 :            :     //            Value *Src = MI->getSource();
    1823                 :            :     //            Src = new BitCastInst(Src, STy->getPointerTo(MI->getSourceAddressSpace()), "", MI);
    1824                 :            :     //            auto &Shadow = S.ShadowAllocas[AI];
    1825                 :            :     //            if (!Shadow)
    1826                 :            :     //                Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, nroots), 0, "", MI);
    1827                 :            :     //            AI = Shadow;
    1828                 :            :     //            unsigned count = TrackWithShadow(Src, STy, true, AI, IRBuilder<>(MI));
    1829                 :            :     //            assert(count == tracked.count); (void)count;
    1830                 :            :     //        }
    1831                 :            :     //        S.ArrayAllocas[AI] = nroots;
    1832                 :            :     //    }
    1833                 :            :     //}
    1834                 :            :     //// TODO: else???
    1835                 :          0 : }
    1836                 :            : 
    1837                 :     554224 : void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) {
    1838                 :     554224 :     Value *PtrBase = I->getPointerOperand()->stripInBoundsOffsets();
    1839                 :     554224 :     auto tracked = CountTrackedPointers(I->getValueOperand()->getType());
    1840         [ +  + ]:     554224 :     if (!tracked.count)
    1841                 :     547354 :         return; // nothing to track is being stored
    1842         [ +  + ]:     278761 :     if (AllocaInst *AI = dyn_cast<AllocaInst>(PtrBase)) {
    1843                 :     161685 :         Type *STy = AI->getAllocatedType();
    1844   [ +  +  +  +  :     161685 :         if (!AI->isStaticAlloca() || (isa<PointerType>(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI))
          -  +  +  +  +  
                      + ]
    1845                 :     154815 :             return; // already numbered this
    1846                 :      16313 :         auto tracked = CountTrackedPointers(STy);
    1847         [ +  - ]:      16313 :         if (tracked.count) {
    1848         [ -  + ]:      16313 :             assert(!tracked.derived);
    1849         [ +  + ]:      16313 :             if (tracked.all) {
    1850                 :            :                 // track the Alloca directly
    1851                 :       9443 :                 S.ArrayAllocas[AI] = tracked.count * cast<ConstantInt>(AI->getArraySize())->getZExtValue();
    1852                 :       9443 :                 return;
    1853                 :            :             }
    1854                 :            :         }
    1855                 :            :     }
    1856                 :            :     else {
    1857                 :     117076 :         return; // assume it is rooted--TODO: should we be more conservative?
    1858                 :            :     }
    1859                 :            :     // track the Store with a Shadow
    1860                 :            :     //auto &Shadow = S.ShadowAllocas[AI];
    1861                 :            :     //if (!Shadow)
    1862                 :            :     //    Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI);
    1863                 :            :     //AI = Shadow;
    1864                 :            :     //Value *Src = I->getValueOperand();
    1865                 :            :     //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?);
    1866                 :            :     //assert(count == tracked.count); (void)count;
    1867                 :       6870 :     S.TrackedStores.push_back(std::make_pair(I, tracked.count));
    1868                 :            : }
    1869                 :            : 
    1870                 :            : /*
    1871                 :            :  * DataFlow equations:
    1872                 :            :  * LiveIn[BB] = UpExposedUses[BB] ∪ (LiveOut[BB] - Defs[BB])
    1873                 :            :  * LiveOut[BB] =  PhiUses[BB] ∪ ∪_{Succ} LiveIn[Succ]
    1874                 :            :  *
    1875                 :            :  * We'll perform textbook iterative dataflow to compute this. There are better
    1876                 :            :  * algorithms. If this starts becoming a problem, we should use one of them.
    1877                 :            :  */
    1878                 :     131071 : void LateLowerGCFrame::ComputeLiveness(State &S) {
    1879                 :     131071 :     bool Converged = false;
    1880                 :            :     /* Liveness is a reverse problem, so RPOT is a good way to
    1881                 :            :      * perform this iteration.
    1882                 :            :      */
    1883                 :     262142 :     ReversePostOrderTraversal<Function *> RPOT(S.F);
    1884                 :     262142 :     BitVector NewLive;
    1885         [ +  + ]:     664373 :     while (!Converged) {
    1886                 :     533302 :         bool AnyChanged = false;
    1887         [ +  + ]:   33515300 :         for (BasicBlock *BB : RPOT) {
    1888                 :            :             // This could all be done more efficiently, by only updating what
    1889                 :            :             // changed - Let's get it working first though.
    1890                 :   32982000 :             BBState &BBS = S.BBStates[BB];
    1891                 :   32982000 :             NewLive = BBS.PhiOuts;
    1892         [ +  + ]:   77134700 :             for (BasicBlock *Succ : successors(BB)) {
    1893                 :   44152700 :                 NewLive |= S.BBStates[Succ].LiveIn;
    1894                 :            :             }
    1895         [ +  + ]:   32982000 :             if (NewLive != BBS.LiveOut) {
    1896                 :    6584770 :                 AnyChanged = true;
    1897                 :    6584770 :                 BBS.LiveOut = NewLive;
    1898                 :    6584770 :                 MaybeResize(BBS, BBS.LiveOut.size() - 1);
    1899                 :            :             }
    1900                 :   32982000 :             NewLive.reset(BBS.Defs);
    1901                 :   32982000 :             NewLive |= BBS.UpExposedUses;
    1902         [ +  + ]:   32982000 :             if (NewLive != BBS.LiveIn) {
    1903                 :    5948900 :                 AnyChanged = true;
    1904                 :    5948900 :                 std::swap(BBS.LiveIn, NewLive);
    1905                 :            :             }
    1906                 :            :         }
    1907                 :     533302 :         Converged = !AnyChanged;
    1908                 :            :     }
    1909                 :     131071 :     ComputeLiveSets(S);
    1910                 :     131071 : }
    1911                 :            : 
    1912                 :            : // For debugging
    1913                 :          0 : JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const char *BBName) {
    1914         [ #  # ]:          0 :     for (auto it : S.SafepointNumbering) {
    1915         [ #  # ]:          0 :         if (it.first->getParent()->getName() == BBName) {
    1916                 :          0 :             dbgs() << "Live at " << *it.first << "\n";
    1917                 :          0 :             BitVector &LS = S.LiveSets[it.second];
    1918         [ #  # ]:          0 :             for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
    1919                 :          0 :                 dbgs() << "\t";
    1920                 :          0 :                 S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
    1921                 :          0 :                 dbgs() << "\n";
    1922                 :            :             }
    1923                 :            :         }
    1924                 :            :     }
    1925                 :          0 : }
    1926                 :            : 
    1927                 :     823932 : void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots)
    1928                 :            : {
    1929                 :    1647860 :     BitVector FullLS(S.MaxPtrNumber + 1, false);
    1930                 :     823932 :     FullLS |= LS;
    1931                 :            :     // First expand the live set according to the refinement map
    1932                 :            :     // so that we can see all the values that are effectively live.
    1933         [ +  + ]:     934036 :     for (auto Num: CalleeRoots) {
    1934                 :            :         // For callee rooted values, they are all kept alive at the safepoint.
    1935                 :            :         // Make sure they are marked (even though they probably are already)
    1936                 :            :         // so that other values can be refined to them.
    1937                 :     110104 :         FullLS[Num] = 1;
    1938                 :            :     }
    1939                 :            :     bool changed;
    1940         [ +  + ]:    1549290 :     do {
    1941                 :    1549290 :         changed = false;
    1942         [ +  + ]:  138688000 :         for (auto &kv: S.Refinements) {
    1943                 :  137139000 :             int Num = kv.first;
    1944   [ +  -  +  +  :  137139000 :             if (Num < 0 || HasBitSet(FullLS, Num) || kv.second.empty())
             +  +  +  + ]
    1945                 :   73024200 :                 continue;
    1946                 :   64114600 :             bool live = true;
    1947         [ +  + ]:  121867000 :             for (auto &refine: kv.second) {
    1948   [ +  +  +  +  :   64700100 :                 if (refine < 0 || HasBitSet(FullLS, refine))
                   +  + ]
    1949                 :   57752800 :                     continue;
    1950                 :    6947250 :                 live = false;
    1951                 :    6947250 :                 break;
    1952                 :            :             }
    1953         [ +  + ]:   64114600 :             if (live) {
    1954                 :   57167300 :                 changed = true;
    1955                 :   57167300 :                 FullLS[Num] = 1;
    1956                 :            :             }
    1957                 :            :         }
    1958                 :            :     } while (changed);
    1959                 :            :     // Now remove all values from the LiveSet that's kept alive by other objects
    1960                 :            :     // This loop only mutate `LS` which isn't read from in the loop body so
    1961                 :            :     // a single pass is enough.
    1962         [ +  + ]:    2566300 :     for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
    1963         [ +  + ]:    1742370 :         if (!S.Refinements.count(Idx))
    1964                 :     914603 :             continue;
    1965                 :     827767 :         const auto &RefinedPtr = S.Refinements[Idx];
    1966         [ +  + ]:     827767 :         if (RefinedPtr.empty())
    1967                 :     244238 :             continue;
    1968                 :     583529 :         bool rooted = true;
    1969         [ +  + ]:    1147510 :         for (auto RefPtr: RefinedPtr) {
    1970   [ +  +  +  +  :     587489 :             if (RefPtr < 0 || HasBitSet(FullLS, RefPtr))
                   +  + ]
    1971                 :     563981 :                 continue;
    1972                 :      23508 :             rooted = false;
    1973                 :      23508 :             break;
    1974                 :            :         }
    1975         [ +  + ]:     583529 :         if (rooted) {
    1976                 :     560021 :             LS[Idx] = 0;
    1977                 :            :         }
    1978                 :            :     }
    1979         [ +  + ]:     934036 :     for (auto Num: CalleeRoots) {
    1980                 :            :         // Now unmark all values that are rooted by the callee after
    1981                 :            :         // refining other values to them.
    1982                 :     110104 :         LS[Num] = 0;
    1983                 :            :     }
    1984                 :     823932 : }
    1985                 :            : 
    1986                 :     131071 : void LateLowerGCFrame::ComputeLiveSets(State &S) {
    1987                 :            :     // Iterate over all safe points. Add to live sets all those variables that
    1988                 :            :     // are now live across their parent block.
    1989         [ +  + ]:     955003 :     for (auto it : S.SafepointNumbering) {
    1990                 :     823932 :         int idx = it.second;
    1991                 :     823932 :         Instruction *Safepoint = it.first;
    1992                 :     823932 :         BasicBlock *BB = Safepoint->getParent();
    1993                 :     823932 :         BBState &BBS = S.BBStates[BB];
    1994                 :    1647860 :         BitVector LiveAcross = BBS.LiveIn;
    1995                 :     823932 :         LiveAcross &= BBS.LiveOut;
    1996                 :     823932 :         BitVector &LS = S.LiveSets[idx];
    1997                 :     823932 :         LS |= LiveAcross;
    1998         [ +  + ]:    2307360 :         for (int Live : S.LiveIfLiveOut[idx]) {
    1999         [ +  + ]:    1483430 :             if (HasBitSet(BBS.LiveOut, Live))
    2000                 :     100087 :                 LS[Live] = 1;
    2001                 :            :         }
    2002                 :     823932 :         RefineLiveSet(LS, S, S.CalleeRoots[idx]);
    2003                 :            :         // If the function has GC preserves, figure out whether we need to
    2004                 :            :         // add in any extra live values.
    2005         [ +  + ]:     823932 :         if (!S.GCPreserves.empty()) {
    2006         [ +  + ]:     159082 :             if (!S.DT) {
    2007                 :       2648 :                 S.DT = &GetDT();
    2008                 :            :             }
    2009         [ +  + ]:    1202580 :             for (auto it2 : S.GCPreserves) {
    2010         [ +  + ]:    1043500 :                 if (!S.DT->dominates(it2.first, Safepoint))
    2011                 :    1011250 :                     continue;
    2012                 :      32250 :                 bool OutsideRange = false;
    2013         [ +  + ]:      42676 :                 for (const User *U : it2.first->users()) {
    2014                 :            :                     // If this is dominated by an end, we don't need to add
    2015                 :            :                     // the values to our live set.
    2016         [ +  + ]:      31622 :                     if (S.DT->dominates(cast<Instruction>(U), Safepoint)) {
    2017                 :      21196 :                         OutsideRange = true;
    2018                 :      21196 :                         break;
    2019                 :            :                     }
    2020                 :            :                 }
    2021         [ +  + ]:      32250 :                 if (OutsideRange)
    2022                 :      21196 :                     continue;
    2023         [ +  + ]:      23046 :                 for (unsigned Num : it2.second) {
    2024         [ +  + ]:      11992 :                     if (Num >= LS.size())
    2025                 :       1727 :                         LS.resize(Num + 1);
    2026                 :      11992 :                     LS[Num] = 1;
    2027                 :            :                 }
    2028                 :            :             }
    2029                 :            :         }
    2030                 :            :     }
    2031                 :            :     // Compute the interference graph
    2032         [ +  + ]:    2077830 :     for (int i = 0; i <= S.MaxPtrNumber; ++i) {
    2033                 :    3893520 :         SetVector<int> Neighbors;
    2034                 :    3893520 :         BitVector NeighborBits(S.MaxPtrNumber);
    2035         [ +  + ]:  135683000 :         for (auto it : S.SafepointNumbering) {
    2036                 :  133736000 :             const BitVector &LS = S.LiveSets[it.second];
    2037   [ +  +  +  +  :  133736000 :             if ((unsigned)i >= LS.size() || !LS[i])
                   +  + ]
    2038                 :  132646000 :                 continue;
    2039                 :    1089860 :             NeighborBits |= LS;
    2040                 :            :         }
    2041         [ +  + ]:    3780710 :         for (int Idx = NeighborBits.find_first(); Idx >= 0; Idx = NeighborBits.find_next(Idx)) {
    2042                 :            :             // We explicitly let i be a neighbor of itself, to distinguish
    2043                 :            :             // between being the only value live at a safepoint, vs not
    2044                 :            :             // being live at any safepoint.
    2045                 :    1833950 :             Neighbors.insert(Idx);
    2046                 :            :         }
    2047                 :    1946760 :         S.Neighbors.push_back(Neighbors);
    2048                 :            :     }
    2049                 :     131071 : }
    2050                 :            : 
    2051                 :            : /* For chordal interference graphs, this class gives the vertices in a (reverse
    2052                 :            :  * - depending on definition) perfect elimination ordering, in such a way that
    2053                 :            :  * greedy coloring gives an optimal coloring. Since our roots are in SSA form,
    2054                 :            :  * the interference should be chordal.
    2055                 :            :  */
    2056                 :            : struct PEOIterator {
    2057                 :            :     struct Element {
    2058                 :            :         unsigned weight;
    2059                 :            :         unsigned pos;
    2060                 :            :     };
    2061                 :            :     std::vector<Element> Elements;
    2062                 :            :     std::vector<std::vector<int>> Levels;
    2063                 :            :     const std::vector<SetVector<int>> &Neighbors;
    2064                 :     131071 :     PEOIterator(const std::vector<SetVector<int>> &Neighbors) : Neighbors(Neighbors) {
    2065                 :            :         // Initialize State
    2066                 :     262142 :         std::vector<int> FirstLevel;
    2067         [ +  + ]:    2077830 :         for (unsigned i = 0; i < Neighbors.size(); ++i) {
    2068                 :    1946760 :             FirstLevel.push_back(i);
    2069                 :    1946760 :             Element E{0, i};
    2070                 :    1946760 :             Elements.push_back(E);
    2071                 :            :         }
    2072                 :     131071 :         Levels.push_back(FirstLevel);
    2073                 :     131071 :     }
    2074                 :    2077830 :     int next() {
    2075                 :            :         // Find the element in the highest bucket
    2076                 :    2077830 :         int NextElement = -1;
    2077   [ +  +  +  +  :    4113490 :         while (NextElement == -1 && !Levels.empty()) {
                   +  + ]
    2078                 :    2035660 :             std::vector<int> &LastLevel = Levels.back();
    2079   [ +  +  +  +  :    4708330 :             while (NextElement == -1 && !LastLevel.empty()) {
                   +  + ]
    2080                 :    2672670 :                 NextElement = LastLevel.back();
    2081                 :    2672670 :                 LastLevel.pop_back();
    2082                 :            :             }
    2083         [ +  + ]:    2035660 :             if (LastLevel.empty())
    2084                 :     300906 :                 Levels.pop_back();
    2085                 :            :         }
    2086         [ +  + ]:    2077830 :         if (NextElement == -1)
    2087                 :     131071 :             return NextElement;
    2088                 :            :         // Make sure not to try to re-use this later.
    2089                 :    1946760 :         Elements[NextElement].weight = (unsigned)-1;
    2090                 :            :         // Raise neighbors
    2091         [ +  + ]:    3780710 :         for (int Neighbor : Neighbors[NextElement]) {
    2092         [ +  + ]:    1833950 :             if (Neighbor == NextElement)
    2093                 :     382127 :                 continue;
    2094                 :    1451820 :             Element &NElement = Elements[Neighbor];
    2095                 :            :             // Already processed. Don't re-enqueue
    2096         [ +  + ]:    1451820 :             if (NElement.weight == (unsigned)-1)
    2097                 :     725910 :                 continue;
    2098                 :            :             // Kill old queue position
    2099                 :     725910 :             Levels[NElement.weight][NElement.pos] = -1;
    2100                 :            :             // Raise the neighbor to the next level.
    2101                 :     725910 :             NElement.weight += 1;
    2102         [ +  + ]:     725910 :             if (NElement.weight >= Levels.size())
    2103                 :     169835 :                 Levels.push_back(std::vector<int>{});
    2104                 :     725910 :             Levels[NElement.weight].push_back(Neighbor);
    2105                 :     725910 :             NElement.pos = Levels[NElement.weight].size()-1;
    2106                 :            :         }
    2107                 :            :         // As an enhancement, we might want to periodically compactify the whole
    2108                 :            :         // data structure. This could be done here.
    2109                 :    1946760 :         return NextElement;
    2110                 :            :     }
    2111                 :            : };
    2112                 :            : 
    2113                 :          0 : JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &Colors)
    2114                 :            : {
    2115         [ #  # ]:          0 :     for (unsigned i = 0; i < Colors.size(); ++i) {
    2116         [ #  # ]:          0 :         if (Colors[i] == -1)
    2117                 :          0 :             continue;
    2118                 :          0 :         dbgs() << "\tValue ";
    2119                 :          0 :         S.ReversePtrNumbering.at(i)->printAsOperand(dbgs());
    2120                 :          0 :         dbgs() << " assigned color " << Colors[i] << "\n";
    2121                 :            :     }
    2122                 :          0 : }
    2123                 :            : 
    2124                 :     131071 : std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
    2125                 :     131071 :     std::vector<int> Colors;
    2126                 :     131071 :     Colors.resize(S.MaxPtrNumber + 1, -1);
    2127                 :     262142 :     PEOIterator Ordering(S.Neighbors);
    2128                 :     131071 :     int PreAssignedColors = 0;
    2129                 :            :     /* First assign permanent slots to things that need them due
    2130                 :            :        to returns_twice */
    2131         [ +  + ]:     135909 :     for (auto it : S.ReturnsTwice) {
    2132                 :       4838 :         int Num = S.SafepointNumbering.at(it);
    2133                 :       4838 :         const BitVector &LS = S.LiveSets[Num];
    2134         [ +  + ]:      11028 :         for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
    2135         [ +  + ]:       6190 :             if (Colors[Idx] == -1)
    2136                 :       5556 :                 Colors[Idx] = PreAssignedColors++;
    2137                 :            :         }
    2138                 :            :     }
    2139                 :            :     /* Greedy coloring */
    2140                 :     131071 :     int MaxAssignedColor = -1;
    2141                 :     131071 :     int ActiveElement = 1;
    2142                 :     262142 :     BitVector UsedColors;
    2143         [ +  + ]:    2077830 :     while ((ActiveElement = Ordering.next()) != -1) {
    2144         [ +  + ]:    1946760 :         if (Colors[ActiveElement] != -1)
    2145                 :       5556 :             continue;
    2146                 :    1941200 :         UsedColors.resize(MaxAssignedColor + 2, false);
    2147                 :    1941200 :         UsedColors.reset();
    2148         [ +  + ]:    1941200 :         if (S.Neighbors[ActiveElement].empty()) {
    2149                 :            :             // No need to color a value not live at any safe point
    2150                 :    1564630 :             continue;
    2151                 :            :         }
    2152         [ +  + ]:    2144360 :         for (int Neighbor : S.Neighbors[ActiveElement]) {
    2153                 :    1767790 :             int NeighborColor = Colors[Neighbor];
    2154         [ +  + ]:    1767790 :             if (NeighborColor == -1)
    2155                 :    1052540 :                 continue;
    2156         [ +  + ]:     715251 :             if (NeighborColor < PreAssignedColors)
    2157                 :      39287 :                 continue;
    2158                 :     675964 :             UsedColors[NeighborColor - PreAssignedColors] = 1;
    2159                 :            :         }
    2160                 :     376571 :         int NewColor = UsedColors.flip().find_first();
    2161         [ +  + ]:     376571 :         if (NewColor > MaxAssignedColor)
    2162                 :     124688 :             MaxAssignedColor = NewColor;
    2163                 :     376571 :         NewColor += PreAssignedColors;
    2164                 :     376571 :         Colors[ActiveElement] = NewColor;
    2165                 :            :     }
    2166                 :     131071 :     return Colors;
    2167                 :            : }
    2168                 :            : 
    2169                 :            : // Size of T is assumed to be `sizeof(void*)`
    2170                 :     248892 : Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
    2171                 :            : {
    2172                 :     248892 :     auto T_size = getSizeTy(T->getContext());
    2173   [ +  +  -  + ]:     248892 :     assert(T == T_size || isa<PointerType>(T));
    2174                 :     248892 :     auto TV = cast<PointerType>(V->getType());
    2175                 :     248892 :     auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
    2176                 :     248892 :     return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1));
    2177                 :            : }
    2178                 :            : 
    2179                 :     184895 : Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
    2180                 :            : {
    2181                 :     184895 :     auto T_size = getSizeTy(builder.getContext());
    2182                 :     184895 :     auto addr = EmitTagPtr(builder, T_size, V);
    2183                 :     184895 :     LoadInst *load = builder.CreateAlignedLoad(T_size, addr, Align(sizeof(size_t)));
    2184                 :     184895 :     load->setOrdering(AtomicOrdering::Unordered);
    2185                 :     184895 :     load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
    2186                 :     184895 :     MDBuilder MDB(load->getContext());
    2187                 :     184895 :     auto *NullInt = ConstantInt::get(T_size, 0);
    2188                 :            :     // We can be sure that the tag is larger than page size.
    2189                 :            :     // Hopefully this is enough to convince LLVM that the value is still not NULL
    2190                 :            :     // after masking off the tag bits
    2191                 :     184895 :     auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 4096));
    2192                 :     184895 :     load->setMetadata(LLVMContext::MD_range, MDB.createRange(NonNullInt, NullInt));
    2193                 :     184895 :     return load;
    2194                 :            : }
    2195                 :            : 
    2196                 :            : // Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
    2197                 :            : // constant store loop to produce a `memset_pattern16` with a global variable
    2198                 :            : // that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
    2199                 :            : // This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
    2200                 :            : // for NI pointers.
    2201                 :      28807 : static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
    2202                 :            : {
    2203         [ -  + ]:      28807 :     if (!S)
    2204                 :          0 :         return nullptr;
    2205                 :      28807 :     auto it = S->AllPtrNumbering.find(V);
    2206         [ -  + ]:      28807 :     if (it == S->AllPtrNumbering.end())
    2207                 :          0 :         return nullptr;
    2208                 :      28807 :     auto rit = S->Refinements.find(it->second);
    2209   [ +  +  +  + ]:      28807 :     return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
    2210                 :            : }
    2211                 :            : 
    2212                 :      31290 : static bool IsPermRooted(Value *V, State *S)
    2213                 :            : {
    2214         [ +  + ]:      31290 :     if (isa<Constant>(V))
    2215                 :       2483 :         return true;
    2216         [ +  + ]:      28807 :     if (auto *RefinePtr = FindRefinements(V, S))
    2217   [ +  +  +  + ]:       4021 :         return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
    2218                 :      24786 :     return false;
    2219                 :            : }
    2220                 :            : 
    2221                 :    2194130 : static inline void UpdatePtrNumbering(Value *From, Value *To, State *S)
    2222                 :            : {
    2223         [ +  + ]:    2194130 :     if (!S)
    2224                 :    1711630 :         return;
    2225                 :    2180820 :     auto it = S->AllPtrNumbering.find(From);
    2226         [ +  + ]:    2180820 :     if (it == S->AllPtrNumbering.end())
    2227                 :    1698310 :         return;
    2228                 :     482504 :     auto Num = it->second;
    2229                 :     482504 :     S->AllPtrNumbering.erase(it);
    2230         [ +  - ]:     482504 :     if (To) {
    2231                 :     482504 :         S->AllPtrNumbering[To] = Num;
    2232                 :            :     }
    2233                 :            : }
    2234                 :            : 
    2235                 :    2539240 : MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
    2236                 :    2539240 :     return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
    2237                 :            : }
    2238                 :            : 
    2239                 :            : 
    2240                 :     137811 : bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
    2241                 :     137811 :     auto T_int32 = Type::getInt32Ty(F.getContext());
    2242                 :     137811 :     auto T_size = getSizeTy(F.getContext());
    2243                 :     137811 :     bool ChangesMade = false;
    2244                 :            :     // We create one alloca for all the jlcall frames that haven't been processed
    2245                 :            :     // yet. LLVM would merge them anyway later, so might as well save it a bit
    2246                 :            :     // of work
    2247                 :     137811 :     size_t maxframeargs = 0;
    2248                 :     137811 :     Instruction *StartOff = &*(F.getEntryBlock().begin());
    2249                 :     137811 :     PointerType *T_pprjlvalue = nullptr;
    2250                 :     137811 :     AllocaInst *Frame = nullptr;
    2251         [ +  - ]:     137811 :     if (T_prjlvalue) {
    2252                 :     137811 :         T_pprjlvalue = T_prjlvalue->getPointerTo();
    2253                 :     275622 :         Frame = new AllocaInst(T_prjlvalue, 0,
    2254                 :     275622 :             ConstantInt::get(T_int32, maxframeargs), "", StartOff);
    2255                 :            :     }
    2256                 :     137811 :     std::vector<CallInst*> write_barriers;
    2257         [ +  + ]:    1792360 :     for (BasicBlock &BB : F) {
    2258         [ +  + ]:   24477300 :         for (auto it = BB.begin(); it != BB.end();) {
    2259                 :   22822800 :             Instruction *I = &*it;
    2260                 :            :             // strip all constant alias information, as it might depend on the gc having
    2261                 :            :             // preserved a gc root, which stops being true after this pass (#32215)
    2262                 :            :             // similar to RewriteStatepointsForGC::stripNonValidData, but less aggressive
    2263         [ +  + ]:   22822800 :             if (I->getMetadata(LLVMContext::MD_invariant_load))
    2264                 :    2539240 :                 I->setMetadata(LLVMContext::MD_invariant_load, NULL);
    2265         [ +  + ]:   22822800 :             if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
    2266   [ +  +  +  -  :    3670510 :                 if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const"})) {
                   +  + ]
    2267                 :    2539240 :                     MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);
    2268         [ +  - ]:    2539240 :                     if (MutableTBAA != TBAA)
    2269                 :    2539240 :                         I->setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
    2270                 :            :                 }
    2271                 :            :             }
    2272                 :            :             // FCA chains created by SROA start with an undef value
    2273                 :            :             // if the type contains an tracked pointer that can lead to a partial
    2274                 :            :             // initialisation and LateLower might have inserted an extractvalue
    2275                 :            :             // of an undef field. Fix this by changing it to start with an zero-init
    2276         [ +  + ]:   22822800 :             if (auto *IV = dyn_cast<InsertValueInst>(*&it)) {
    2277                 :     100108 :                 Value *SourceAggregate = IV->getAggregateOperand();
    2278         [ +  + ]:     100108 :                 if (isa<UndefValue>(SourceAggregate)) {
    2279                 :       1870 :                     IV->setOperand(IV->getAggregateOperandIndex(), ConstantAggregateZero::get(IV->getType()));
    2280                 :       1870 :                     ChangesMade = true;
    2281                 :            :                 }
    2282                 :            :             }
    2283                 :            : 
    2284                 :   22822800 :             auto *CI = dyn_cast<CallInst>(&*it);
    2285         [ +  + ]:   22822800 :             if (!CI) {
    2286                 :   20584600 :                 ++it;
    2287                 :   20613900 :                 continue;
    2288                 :            :             }
    2289                 :    2238180 :             Value *callee = CI->getCalledOperand();
    2290   [ +  -  +  +  :    2238180 :             if (callee && (callee == gc_flush_func || callee == gc_preserve_begin_func
                   +  + ]
    2291         [ +  + ]:    2223940 :                         || callee == gc_preserve_end_func)) {
    2292                 :            :                 /* No replacement */
    2293   [ +  +  +  + ]:    2210070 :             } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) {
    2294                 :      33023 :                 auto *obj = CI->getOperand(0);
    2295                 :      33023 :                 auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI);
    2296                 :      33023 :                 ASCI->takeName(CI);
    2297                 :      33023 :                 CI->replaceAllUsesWith(ASCI);
    2298                 :      33023 :                 UpdatePtrNumbering(CI, ASCI, S);
    2299   [ +  +  +  + ]:    2177050 :             } else if (alloc_obj_func && callee == alloc_obj_func) {
    2300         [ -  + ]:      63997 :                 assert(CI->arg_size() == 3);
    2301                 :            : 
    2302                 :            :                 // Initialize an IR builder.
    2303                 :     127994 :                 IRBuilder<> builder(CI);
    2304                 :      63997 :                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
    2305                 :            : 
    2306                 :            :                 // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
    2307                 :            :                 // `julia.gc_alloc_obj` except it doesn't set the tag.
    2308                 :      63997 :                 auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
    2309                 :      63997 :                 auto ptlsLoad = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
    2310                 :      63997 :                 auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
    2311                 :     127994 :                 auto newI = builder.CreateCall(
    2312                 :            :                     allocBytesIntrinsic,
    2313                 :            :                     {
    2314                 :            :                         ptls,
    2315                 :      63997 :                         builder.CreateIntCast(
    2316                 :            :                             CI->getArgOperand(1),
    2317                 :            :                             allocBytesIntrinsic->getFunctionType()->getParamType(1),
    2318                 :            :                             false)
    2319                 :            :                     });
    2320                 :      63997 :                 newI->takeName(CI);
    2321                 :            : 
    2322                 :            :                 // LLVM alignment/bit check is not happy about addrspacecast and refuse
    2323                 :            :                 // to remove write barrier because of it.
    2324                 :            :                 // We pretty much only load using `T_size` so try our best to strip
    2325                 :            :                 // as many cast as possible.
    2326                 :      63997 :                 auto tag = CI->getArgOperand(2)->stripPointerCastsAndAliases();
    2327         [ +  + ]:      63997 :                 if (auto C = dyn_cast<ConstantExpr>(tag)) {
    2328         [ +  - ]:      24083 :                     if (C->getOpcode() == Instruction::IntToPtr) {
    2329                 :      24083 :                         tag = C->getOperand(0);
    2330                 :            :                     }
    2331                 :            :                 }
    2332         [ +  + ]:      39914 :                 else if (auto LI = dyn_cast<LoadInst>(tag)) {
    2333                 :            :                     // Make sure the load is correctly marked as aligned
    2334                 :            :                     // since LLVM might have removed them.
    2335                 :            :                     // We can't do this in general since the load might not be
    2336                 :            :                     // a type in other branches.
    2337                 :            :                     // However, it should be safe for us to do this on const globals
    2338                 :            :                     // which should be the important cases as well.
    2339                 :      39913 :                     bool task_local = false;
    2340   [ +  -  -  +  :      39913 :                     if (isLoadFromConstGV(LI, task_local) && getLoadValueAlign(LI) < 16) {
                   -  + ]
    2341                 :          0 :                         Type *T_int64 = Type::getInt64Ty(LI->getContext());
    2342                 :          0 :                         auto op = ConstantAsMetadata::get(ConstantInt::get(T_int64, 16));
    2343                 :          0 :                         LI->setMetadata(LLVMContext::MD_align,
    2344                 :          0 :                                         MDNode::get(LI->getContext(), { op }));
    2345                 :            :                     }
    2346                 :            :                 }
    2347                 :            :                 // As a last resort, if we didn't manage to strip down the tag
    2348                 :            :                 // for LLVM, emit an alignment assumption.
    2349                 :      63997 :                 auto tag_type = tag->getType();
    2350         [ +  + ]:      63997 :                 if (tag_type->isPointerTy()) {
    2351                 :      39914 :                     auto &DL = CI->getModule()->getDataLayout();
    2352                 :      39914 :                     auto align = tag->getPointerAlignment(DL).value();
    2353         [ -  + ]:      39914 :                     if (align < 16) {
    2354                 :            :                         // On 5 <= LLVM < 12, it is illegal to call this on
    2355                 :            :                         // non-integral pointer. This relies on stripping the
    2356                 :            :                         // non-integralness from datalayout before this pass
    2357                 :          0 :                         builder.CreateAlignmentAssumption(DL, tag, 16);
    2358                 :            :                     }
    2359                 :            :                 }
    2360                 :            :                 // Set the tag.
    2361                 :      63997 :                 StoreInst *store = builder.CreateAlignedStore(
    2362                 :      63997 :                     tag, EmitTagPtr(builder, tag_type, newI), Align(sizeof(size_t)));
    2363                 :      63997 :                 store->setOrdering(AtomicOrdering::Unordered);
    2364                 :      63997 :                 store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
    2365                 :            : 
    2366                 :            :                 // Replace uses of the call to `julia.gc_alloc_obj` with the call to
    2367                 :            :                 // `julia.gc_alloc_bytes`.
    2368                 :      63997 :                 CI->replaceAllUsesWith(newI);
    2369                 :            : 
    2370                 :            :                 // Update the pointer numbering.
    2371                 :     127994 :                 UpdatePtrNumbering(CI, newI, S);
    2372   [ +  +  +  + ]:    2113050 :             } else if (typeof_func && callee == typeof_func) {
    2373         [ -  + ]:     125942 :                 assert(CI->arg_size() == 1);
    2374                 :     251884 :                 IRBuilder<> builder(CI);
    2375                 :     125942 :                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
    2376                 :     125942 :                 auto tag = EmitLoadTag(builder, CI->getArgOperand(0));
    2377                 :     125942 :                 auto masked = builder.CreateAnd(tag, ConstantInt::get(T_size, ~(uintptr_t)15));
    2378                 :     251884 :                 auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, JuliaType::get_pjlvalue_ty(masked->getContext())),
    2379                 :     125942 :                                                        T_prjlvalue);
    2380                 :     125942 :                 typ->takeName(CI);
    2381                 :     125942 :                 CI->replaceAllUsesWith(typ);
    2382                 :     251884 :                 UpdatePtrNumbering(CI, typ, S);
    2383   [ +  +  +  + ]:    1987110 :             } else if ((write_barrier_func && callee == write_barrier_func) ||
    2384   [ +  +  +  + ]:    1958430 :                        (write_barrier_binding_func && callee == write_barrier_binding_func)) {
    2385                 :            :                 // The replacement for this requires creating new BasicBlocks
    2386                 :            :                 // which messes up the loop. Queue all of them to be replaced later.
    2387         [ -  + ]:      29335 :                 assert(CI->arg_size() >= 1);
    2388                 :      29335 :                 write_barriers.push_back(CI);
    2389                 :      29335 :                 ChangesMade = true;
    2390                 :      29335 :                 ++it;
    2391                 :      29335 :                 continue;
    2392   [ +  +  +  + ]:    1957780 :             } else if ((call_func && callee == call_func) ||
    2393   [ +  +  +  + ]:    1824330 :                        (call2_func && callee == call2_func)) {
    2394         [ -  + ]:     133620 :                 assert(T_prjlvalue);
    2395                 :     133620 :                 size_t nargs = CI->arg_size();
    2396                 :     133620 :                 size_t nframeargs = nargs-1;
    2397         [ +  + ]:     133620 :                 if (callee == call_func)
    2398                 :     133446 :                     nframeargs -= 1;
    2399         [ +  - ]:        174 :                 else if (callee == call2_func)
    2400                 :        174 :                     nframeargs -= 2;
    2401                 :     267240 :                 SmallVector<Value*, 4> ReplacementArgs;
    2402                 :     133620 :                 auto arg_it = CI->arg_begin();
    2403         [ -  + ]:     133620 :                 assert(arg_it != CI->arg_end());
    2404                 :     133620 :                 Value *new_callee = *(arg_it++);
    2405         [ -  + ]:     133620 :                 assert(arg_it != CI->arg_end());
    2406                 :     133620 :                 ReplacementArgs.push_back(*(arg_it++));
    2407         [ +  + ]:     133620 :                 if (callee == call2_func) {
    2408         [ -  + ]:        174 :                     assert(arg_it != CI->arg_end());
    2409                 :        174 :                     ReplacementArgs.push_back(*(arg_it++));
    2410                 :            :                 }
    2411                 :     133620 :                 maxframeargs = std::max(maxframeargs, nframeargs);
    2412                 :     133620 :                 int slot = 0;
    2413                 :     267240 :                 IRBuilder<> Builder (CI);
    2414         [ +  + ]:     451492 :                 for (; arg_it != CI->arg_end(); ++arg_it) {
    2415                 :            :                     // Julia emits IR with proper pointer types here, but because
    2416                 :            :                     // the julia.call signature is varargs, the optimizer is allowed
    2417                 :            :                     // to rewrite pointee types. It'll go away with opaque pointer
    2418                 :            :                     // types anyway.
    2419                 :     635744 :                     Builder.CreateAlignedStore(Builder.CreateBitCast(*arg_it, T_prjlvalue),
    2420                 :     317872 :                             Builder.CreateInBoundsGEP(T_prjlvalue, Frame, ConstantInt::get(T_int32, slot++)),
    2421                 :     635744 :                             Align(sizeof(void*)));
    2422                 :            :                 }
    2423         [ +  + ]:     134661 :                 ReplacementArgs.push_back(nframeargs == 0 ?
    2424                 :       1041 :                     (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) :
    2425                 :            :                     (llvm::Value*)Frame);
    2426                 :     133620 :                 ReplacementArgs.push_back(ConstantInt::get(T_int32, nframeargs));
    2427         [ +  + ]:     133620 :                 if (callee == call2_func) {
    2428                 :            :                     // move trailing arg to the end now
    2429                 :        174 :                     Value *front = ReplacementArgs.front();
    2430                 :        174 :                     ReplacementArgs.erase(ReplacementArgs.begin());
    2431                 :        174 :                     ReplacementArgs.push_back(front);
    2432                 :            :                 }
    2433         [ +  + ]:     133620 :                 FunctionType *FTy = callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) : JuliaType::get_jlfunc_ty(CI->getContext());
    2434                 :     133620 :                 CallInst *NewCall = CallInst::Create(FTy, new_callee, ReplacementArgs, "", CI);
    2435                 :     133620 :                 NewCall->setTailCallKind(CI->getTailCallKind());
    2436                 :     133620 :                 auto callattrs = CI->getAttributes();
    2437                 :     133620 :                 callattrs = AttributeList::get(CI->getContext(), getFnAttrs(callattrs), getRetAttrs(callattrs), {});
    2438         [ +  - ]:     133620 :                 if (auto new_callee = CI->getCalledFunction()) // get the parameter attributes from the function target (if possible)
    2439                 :     133620 :                     callattrs = AttributeList::get(CI->getContext(), {callattrs, new_callee->getAttributes()});
    2440                 :     133620 :                 NewCall->setAttributes(callattrs);
    2441                 :     133620 :                 NewCall->takeName(CI);
    2442                 :     133620 :                 NewCall->copyMetadata(*CI);
    2443                 :     133620 :                 CI->replaceAllUsesWith(NewCall);
    2444                 :     267240 :                 UpdatePtrNumbering(CI, NewCall, S);
    2445         [ -  + ]:    1824160 :             } else if (CI->arg_size() == CI->getNumOperands()) {
    2446                 :            :                 /* No operand bundle to lower */
    2447                 :          0 :                 ++it;
    2448                 :          0 :                 continue;
    2449                 :            :             } else {
    2450                 :    1824160 :                 CallInst *NewCall = CallInst::Create(CI, None, CI);
    2451                 :    1824160 :                 NewCall->takeName(CI);
    2452                 :    1824160 :                 NewCall->copyMetadata(*CI);
    2453                 :    1824160 :                 CI->replaceAllUsesWith(NewCall);
    2454                 :    1824160 :                 UpdatePtrNumbering(CI, NewCall, S);
    2455                 :            :             }
    2456         [ +  + ]:    2208850 :             if (!CI->use_empty()) {
    2457                 :      13391 :                 CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
    2458                 :      13391 :                 UpdatePtrNumbering(CI, nullptr, S);
    2459                 :            :             }
    2460                 :    2208850 :             it = CI->eraseFromParent();
    2461                 :    2208850 :             ChangesMade = true;
    2462                 :            :         }
    2463                 :            :     }
    2464         [ +  + ]:     167146 :     for (auto CI : write_barriers) {
    2465                 :      29335 :         auto parent = CI->getArgOperand(0);
    2466         [ +  + ]:      29335 :         if (std::all_of(CI->op_begin() + 1, CI->op_end(),
    2467   [ +  -  +  + ]:      31290 :                     [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
    2468                 :       1797 :             CI->eraseFromParent();
    2469                 :       1797 :             continue;
    2470                 :            :         }
    2471         [ -  + ]:      27538 :         if (CFGModified) {
    2472                 :          0 :             *CFGModified = true;
    2473                 :            :         }
    2474                 :      55076 :         IRBuilder<> builder(CI);
    2475                 :      27538 :         builder.SetCurrentDebugLocation(CI->getDebugLoc());
    2476                 :      27538 :         auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
    2477                 :      27538 :         auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
    2478                 :      27538 :         auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
    2479                 :      27538 :         builder.SetInsertPoint(mayTrigTerm);
    2480                 :      27538 :         Value *anyChldNotMarked = NULL;
    2481         [ +  + ]:      58953 :         for (unsigned i = 1; i < CI->arg_size(); i++) {
    2482                 :      31415 :             Value *child = CI->getArgOperand(i);
    2483                 :      31415 :             Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
    2484                 :      31415 :             Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
    2485         [ +  + ]:      31415 :             anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
    2486                 :            :         }
    2487         [ -  + ]:      27538 :         assert(anyChldNotMarked); // handled by all_of test above
    2488                 :      27538 :         MDBuilder MDB(parent->getContext());
    2489                 :      55076 :         SmallVector<uint32_t, 2> Weights{1, 9};
    2490                 :      27538 :         auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
    2491                 :            :                                                   MDB.createBranchWeights(Weights));
    2492                 :      27538 :         builder.SetInsertPoint(trigTerm);
    2493         [ +  + ]:      27538 :         if (CI->getCalledOperand() == write_barrier_func) {
    2494                 :      26899 :             builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
    2495                 :            :         }
    2496         [ +  - ]:        639 :         else if (CI->getCalledOperand() == write_barrier_binding_func) {
    2497                 :        639 :             builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCBinding), parent);
    2498                 :            :         }
    2499                 :            :         else {
    2500                 :          0 :             assert(false);
    2501                 :            :         }
    2502                 :      27538 :         CI->eraseFromParent();
    2503                 :            :     }
    2504   [ +  +  +  - ]:     137811 :     if (maxframeargs == 0 && Frame) {
    2505                 :     106501 :         Frame->eraseFromParent();
    2506                 :            :     }
    2507         [ +  - ]:      31310 :     else if (Frame) {
    2508                 :      31310 :         Frame->setOperand(0, ConstantInt::get(T_int32, maxframeargs));
    2509                 :            :     }
    2510                 :     137811 :     return ChangesMade;
    2511                 :            : }
    2512                 :            : 
    2513                 :     537707 : static void AddInPredLiveOuts(BasicBlock *BB, BitVector &LiveIn, State &S)
    2514                 :            : {
    2515                 :     537707 :     bool First = true;
    2516                 :     537707 :     std::set<BasicBlock *> Visited;
    2517                 :     537707 :     std::vector<BasicBlock *> WorkList;
    2518                 :     537707 :     WorkList.push_back(BB);
    2519         [ +  + ]:    6034430 :     while (!WorkList.empty()) {
    2520                 :    5650760 :         BB = &*WorkList.back();
    2521                 :    5650760 :         WorkList.pop_back();
    2522                 :            :         // Nothing is live at function entry
    2523         [ +  + ]:    5650760 :         if (BB == &S.F->getEntryBlock()) {
    2524                 :     152706 :             LiveIn.reset();
    2525                 :     152706 :             return;
    2526                 :            :         }
    2527         [ +  + ]:   13552100 :         for (BasicBlock *Pred : predecessors(BB)) {
    2528         [ +  + ]:    8055370 :             if (!Visited.insert(Pred).second)
    2529                 :    1605840 :                 continue;
    2530         [ +  + ]:    6449530 :             if (!S.BBStates[Pred].HasSafepoint) {
    2531                 :    5501100 :                 WorkList.push_back(Pred);
    2532                 :    5501100 :                 continue;
    2533                 :            :             } else {
    2534                 :     948432 :                 int LastSP = S.BBStates[Pred].Safepoints.front();
    2535         [ +  + ]:     948432 :                 if (First) {
    2536                 :     426272 :                     LiveIn |= S.LiveSets[LastSP];
    2537                 :     426272 :                     First = false;
    2538                 :            :                 } else {
    2539                 :     522160 :                     LiveIn &= S.LiveSets[LastSP];
    2540                 :            :                 }
    2541         [ +  + ]:     948432 :                 if (LiveIn.empty()) // Just a compiler performance optimization
    2542                 :       1334 :                     return;
    2543                 :            :             }
    2544                 :            :         }
    2545                 :            :     }
    2546                 :            : }
    2547                 :            : 
    2548                 :     454460 : void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot,
    2549                 :            :                                          const std::vector<int> &Colors, Value *GCFrame,
    2550                 :            :                                          Instruction *InsertBefore) {
    2551                 :            :     // Get the slot address.
    2552                 :     908920 :     auto slotAddress = CallInst::Create(
    2553                 :            :         getOrDeclare(jl_intrinsics::getGCFrameSlot),
    2554                 :     454460 :         {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
    2555                 :            :         "", InsertBefore);
    2556                 :            : 
    2557                 :     454460 :     Value *Val = GetPtrForNumber(S, R, InsertBefore);
    2558                 :            :     // Pointee types don't have semantics, so the optimizer is
    2559                 :            :     // free to rewrite them if convenient. We need to change
    2560                 :            :     // it back here for the store.
    2561         [ -  + ]:     454460 :     if (Val->getType() != T_prjlvalue)
    2562                 :          0 :         Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
    2563                 :     454460 :     new StoreInst(Val, slotAddress, InsertBefore);
    2564                 :     454460 : }
    2565                 :            : 
    2566                 :      60842 : void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
    2567                 :            :                                           const std::vector<int> &Colors, Value *GCFrame)
    2568                 :            : {
    2569         [ +  + ]:    1562480 :     for (auto &BB : *S.F) {
    2570                 :    1501640 :         const BBState &BBS = S.BBStates[&BB];
    2571         [ +  + ]:    1501640 :         if (!BBS.HasSafepoint) {
    2572                 :     963930 :             continue;
    2573                 :            :         }
    2574                 :    1075410 :         BitVector LiveIn;
    2575                 :     537707 :         AddInPredLiveOuts(&BB, LiveIn, S);
    2576                 :     537707 :         const BitVector *LastLive = &LiveIn;
    2577                 :    1276500 :         for(auto rit = BBS.Safepoints.rbegin();
    2578         [ +  + ]:    1276500 :               rit != BBS.Safepoints.rend(); ++rit ) {
    2579                 :     738789 :             const BitVector &NowLive = S.LiveSets[*rit];
    2580         [ +  + ]:    1828650 :             for (int Idx = NowLive.find_first(); Idx >= 0; Idx = NowLive.find_next(Idx)) {
    2581         [ +  + ]:    1089860 :                 if (!HasBitSet(*LastLive, Idx)) {
    2582                 :     454460 :                     PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
    2583                 :     454460 :                       S.ReverseSafepointNumbering[*rit]);
    2584                 :            :                 }
    2585                 :            :             }
    2586                 :     738789 :             LastLive = &NowLive;
    2587                 :            :         }
    2588                 :            :     }
    2589                 :      60842 : }
    2590                 :            : 
    2591                 :     131071 : void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
    2592                 :     131071 :     auto F = S.F;
    2593                 :     131071 :     auto T_int32 = Type::getInt32Ty(F->getContext());
    2594                 :     131071 :     int MaxColor = -1;
    2595         [ +  + ]:    2077830 :     for (auto C : Colors)
    2596         [ +  + ]:    1946760 :         if (C > MaxColor)
    2597                 :      69664 :             MaxColor = C;
    2598                 :            : 
    2599                 :            :     // Insert instructions for the actual gc frame
    2600   [ +  +  +  +  :     131071 :     if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
          +  +  +  +  +  
                      + ]
    2601                 :            :         // Create and push a GC frame.
    2602                 :     121684 :         auto gcframe = CallInst::Create(
    2603                 :            :             getOrDeclare(jl_intrinsics::newGCFrame),
    2604                 :      60842 :             {ConstantInt::get(T_int32, 0)},
    2605                 :            :             "gcframe");
    2606                 :      60842 :         gcframe->insertBefore(&*F->getEntryBlock().begin());
    2607                 :            : 
    2608                 :     121684 :         auto pushGcframe = CallInst::Create(
    2609                 :            :             getOrDeclare(jl_intrinsics::pushGCFrame),
    2610                 :      60842 :             {gcframe, ConstantInt::get(T_int32, 0)});
    2611                 :      60842 :         pushGcframe->insertAfter(pgcstack);
    2612                 :            : 
    2613                 :            :         // Replace Allocas
    2614                 :      60842 :         unsigned AllocaSlot = 2; // first two words are metadata
    2615                 :     367220 :         auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
    2616                 :            :             // Pick a slot for the alloca.
    2617                 :      91805 :             unsigned align = AI->getAlignment() / sizeof(void*); // TODO: use DataLayout pointer size
    2618         [ -  + ]:      91805 :             assert(align <= 16 / sizeof(void*) && "Alignment exceeds llvm-final-gc-lowering abilities");
    2619         [ -  + ]:      91805 :             if (align > 1)
    2620                 :          0 :                 AllocaSlot = LLT_ALIGN(AllocaSlot, align);
    2621                 :     183610 :             Instruction *slotAddress = CallInst::Create(
    2622                 :            :                 getOrDeclare(jl_intrinsics::getGCFrameSlot),
    2623                 :      91805 :                 {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
    2624                 :      91805 :             slotAddress->insertAfter(gcframe);
    2625                 :      91805 :             slotAddress->takeName(AI);
    2626                 :            : 
    2627                 :            :             // Check for lifetime intrinsics on this alloca, we can't keep them
    2628                 :            :             // because we're changing the semantics
    2629                 :      91805 :             std::vector<CallInst*> ToDelete;
    2630                 :      91805 :             RecursivelyVisit<IntrinsicInst>([&](Use &VU) {
    2631                 :       4761 :                 IntrinsicInst *II = cast<IntrinsicInst>(VU.getUser());
    2632   [ +  -  +  -  :       9522 :                 if ((II->getIntrinsicID() != Intrinsic::lifetime_start &&
                   +  - ]
    2633                 :       4761 :                             II->getIntrinsicID() != Intrinsic::lifetime_end))
    2634                 :       4761 :                     return;
    2635                 :          0 :                 ToDelete.push_back(II);
    2636                 :            :             }, AI);
    2637         [ -  + ]:      91805 :             for (CallInst *II : ToDelete) {
    2638                 :          0 :                 II->eraseFromParent();
    2639                 :            :             }
    2640         [ +  + ]:      91805 :             if (slotAddress->getType() != AI->getType()) {
    2641                 :            :                 // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
    2642                 :      22517 :                 auto BCI  = new BitCastInst(slotAddress, AI->getType());
    2643                 :      22517 :                 BCI->insertAfter(slotAddress);
    2644                 :      22517 :                 slotAddress = BCI;
    2645                 :            :             }
    2646                 :      91805 :             AI->replaceAllUsesWith(slotAddress);
    2647                 :      91805 :             AI->eraseFromParent();
    2648                 :      91805 :             AI = NULL;
    2649                 :     152647 :         };
    2650         [ +  + ]:     130130 :         for (AllocaInst *AI : S.Allocas) {
    2651                 :      69288 :             auto ns = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
    2652                 :      69288 :             replace_alloca(AI);
    2653                 :      69288 :             AllocaSlot += ns;
    2654                 :            :         }
    2655         [ +  + ]:      83359 :         for (auto AI : S.ArrayAllocas) {
    2656                 :      22517 :             replace_alloca(AI.first);
    2657                 :      22517 :             AllocaSlot += AI.second;
    2658                 :            :         }
    2659         [ +  + ]:      67712 :         for (auto Store : S.TrackedStores) {
    2660                 :       6870 :             auto SI = Store.first;
    2661                 :       6870 :             auto Base = SI->getValueOperand();
    2662                 :            :             //auto Tracked = TrackCompositeType(Base->getType());
    2663         [ +  + ]:      21447 :             for (unsigned i = 0; i < Store.second; ++i) {
    2664                 :      29154 :                 auto slotAddress = CallInst::Create(
    2665                 :            :                     getOrDeclare(jl_intrinsics::getGCFrameSlot),
    2666                 :      14577 :                     {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
    2667                 :      14577 :                 slotAddress->insertAfter(gcframe);
    2668         [ -  + ]:      14577 :                 auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
    2669                 :      14577 :                 auto Elem = MaybeExtractScalar(S, ValExpr, SI);
    2670         [ -  + ]:      14577 :                 if (Elem->getType() != T_prjlvalue)
    2671                 :          0 :                     Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
    2672                 :            :                 //auto Idxs = makeArrayRef(Tracked[i]);
    2673                 :            :                 //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
    2674                 :      14577 :                 Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
    2675                 :            :                 (void)shadowStore;
    2676                 :            :                 // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
    2677                 :      14577 :                 AllocaSlot++;
    2678                 :            :             }
    2679                 :            :         }
    2680                 :      60842 :         auto NRoots = ConstantInt::get(T_int32, MaxColor + 1 + AllocaSlot - 2);
    2681                 :      60842 :         gcframe->setArgOperand(0, NRoots);
    2682                 :      60842 :         pushGcframe->setArgOperand(1, NRoots);
    2683                 :            : 
    2684                 :            :         // Insert GC frame stores
    2685                 :      60842 :         PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
    2686                 :            :         // Insert GCFrame pops
    2687         [ +  + ]:    1562480 :         for(Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
    2688         [ +  + ]:    1501640 :             if (isa<ReturnInst>(I->getTerminator())) {
    2689                 :      58182 :                 auto popGcframe = CallInst::Create(
    2690                 :            :                     getOrDeclare(jl_intrinsics::popGCFrame),
    2691                 :            :                     {gcframe});
    2692                 :      58182 :                 popGcframe->insertBefore(I->getTerminator());
    2693                 :            :             }
    2694                 :            :         }
    2695                 :            :     }
    2696                 :     131071 : }
    2697                 :            : 
    2698                 :     137811 : bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
    2699                 :     137811 :     initAll(*F.getParent());
    2700                 :            :     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
    2701         [ +  + ]:     137811 :     if (!pgcstack_getter)
    2702                 :       5557 :         return CleanupIR(F, nullptr, CFGModified);
    2703                 :            : 
    2704                 :     132254 :     pgcstack = getPGCstack(F);
    2705         [ +  + ]:     132254 :     if (!pgcstack)
    2706                 :       1183 :         return CleanupIR(F, nullptr, CFGModified);
    2707                 :            : 
    2708                 :     262142 :     State S = LocalScan(F);
    2709                 :     131071 :     ComputeLiveness(S);
    2710                 :     262142 :     std::vector<int> Colors = ColorRoots(S);
    2711                 :     131071 :     std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
    2712                 :     131071 :     PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
    2713                 :     131071 :     CleanupIR(F, &S, CFGModified);
    2714                 :     131071 :     return true;
    2715                 :            : }
    2716                 :            : 
    2717                 :     137811 : bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
    2718                 :      27215 :     auto GetDT = [this]() -> DominatorTree & {
    2719                 :      27215 :         return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    2720                 :     137811 :     };
    2721                 :     137811 :     auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
    2722                 :     137811 :     return lateLowerGCFrame.runOnFunction(F);
    2723                 :            : }
    2724                 :            : 
    2725                 :          0 : PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
    2726                 :            : {
    2727                 :          0 :     auto GetDT = [&AM, &F]() -> DominatorTree & {
    2728                 :          0 :         return AM.getResult<DominatorTreeAnalysis>(F);
    2729                 :          0 :     };
    2730                 :          0 :     auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
    2731                 :          0 :     bool CFGModified = false;
    2732         [ #  # ]:          0 :     if (lateLowerGCFrame.runOnFunction(F, &CFGModified)) {
    2733         [ #  # ]:          0 :         if (CFGModified) {
    2734                 :          0 :             return PreservedAnalyses::none();
    2735                 :            :         } else {
    2736                 :          0 :             return PreservedAnalyses::allInSet<CFGAnalyses>();
    2737                 :            :         }
    2738                 :            :     }
    2739                 :          0 :     return PreservedAnalyses::all();
    2740                 :            : }
    2741                 :            : 
    2742                 :            : 
    2743                 :            : char LateLowerGCFrameLegacy::ID = 0;
    2744                 :            : static RegisterPass<LateLowerGCFrameLegacy> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
    2745                 :            : 
    2746                 :         17 : Pass *createLateLowerGCFramePass() {
    2747                 :         17 :     return new LateLowerGCFrameLegacy();
    2748                 :            : }
    2749                 :            : 
    2750                 :          0 : extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
    2751                 :            : {
    2752                 :          0 :     unwrap(PM)->add(createLateLowerGCFramePass());
    2753                 :          0 : }

Generated by: LCOV version 1.14