Branch data Line data Source code
1 : : // This file is a part of Julia. License is MIT: https://julialang.org/license
2 : :
3 : : #include "llvm-version.h"
4 : : #include "passes.h"
5 : :
6 : : #include <llvm-c/Core.h>
7 : : #include <llvm-c/Types.h>
8 : :
9 : : #include <llvm/ADT/BitVector.h>
10 : : #include <llvm/ADT/PostOrderIterator.h>
11 : : #include <llvm/ADT/SetVector.h>
12 : : #include <llvm/ADT/SmallVector.h>
13 : : #include "llvm/Analysis/CFG.h"
14 : : #include <llvm/IR/Value.h>
15 : : #include <llvm/IR/Constants.h>
16 : : #include <llvm/IR/Dominators.h>
17 : : #include <llvm/IR/Function.h>
18 : : #include <llvm/IR/Instructions.h>
19 : : #include <llvm/IR/IntrinsicInst.h>
20 : : #include <llvm/IR/LegacyPassManager.h>
21 : : #include <llvm/IR/MDBuilder.h>
22 : : #include <llvm/IR/Module.h>
23 : : #include <llvm/IR/IRBuilder.h>
24 : : #include <llvm/IR/Verifier.h>
25 : : #include <llvm/Pass.h>
26 : : #include <llvm/Support/Debug.h>
27 : : #include <llvm/Transforms/Utils/BasicBlockUtils.h>
28 : : #include <llvm/Transforms/Utils/ModuleUtils.h>
29 : :
30 : : #include <llvm/InitializePasses.h>
31 : :
32 : : #include "codegen_shared.h"
33 : : #include "julia.h"
34 : : #include "julia_internal.h"
35 : : #include "julia_assert.h"
36 : : #include "llvm-pass-helpers.h"
37 : : #include <map>
38 : :
39 : : #define DEBUG_TYPE "late_lower_gcroot"
40 : :
41 : : using namespace llvm;
42 : :
43 : : /* Julia GC Root Placement pass. For a general overview of the design of GC
44 : : root lowering, see the devdocs. This file is the actual implementation.
45 : :
46 : : The actual algorithm is fairly straightforward. First recall the goal of this
47 : : pass:
48 : :
49 : : Minimize the number of needed gc roots/stores to them subject to the constraint
50 : : that at every safepoint, any live gc-tracked pointer (i.e. for which there is
51 : : a path after this point that contains a use of this pointer) is in some gc slot.
52 : :
53 : : In particular, in order to understand this algorithm, it is important to
54 : : realize that the only places where rootedness matters is at safepoints.
55 : :
56 : : Now, the primary phases of the algorithm are:
57 : :
58 : : 1. Local Scan
59 : :
60 : : During this step, each Basic Block is inspected and analyzed for local
61 : : properties. In particular, we want to determine the ordering of any of
62 : : the following activities:
63 : :
64 : : - Any Def of a gc-tracked pointer. In general Defs are the results of
65 : : calls or loads from appropriate memory locations. Phi nodes and
66 : : selects do complicate this story slightly as described below.
67 : : - Any use of a gc-tracked or derived pointer. As described in the
68 : : devdocs, a use is in general one of
69 : : a) a load from a tracked/derived value
70 : : b) a store to a tracked/derived value
71 : : c) a store OF a tracked/derived value
72 : : d) a use of a value as a call operand (including operand bundles)
73 : : - Any safepoint
74 : :
75 : : Crucially, we also perform pointer numbering during the local scan,
76 : : assigning every Def a unique integer and caching the integer for each
77 : : derived pointer. This allows us to operate only on the set of Defs (
78 : : represented by these integers) for the rest of the algorithm. We also
79 : : maintain some local utility information that is needed by later passes
80 : : (see the BBState struct for details).
81 : :
82 : : 2. Dataflow Computation
83 : :
84 : : This computation operates entirely over the function's control flow graph
85 : : and does not look into a basic block. The algorithm is essentially
86 : : textbook iterative data flow for liveness computation. However, the
87 : : data flow equations are slightly more complicated because we also
88 : : forward propagate rootedness information in addition to backpropagating
89 : : liveness.
90 : :
91 : : 3. Live Set Computation
92 : :
93 : : With the liveness information from the previous step, we can now compute,
94 : : for every safepoint, the set of values live at that particular safepoint.
95 : : There are three pieces of information being combined here:
96 : : i. Values that needed to be live due to local analysis (e.g. there
97 : : was a def, then a safepoint, then a use). This was computed during
98 : : local analysis.
99 : : ii. Values that are live across the basic block (i.e. they are live
100 : : at every safepoint within the basic block). This relies entirely
101 : : on the liveness information.
102 : : iii. Values that are now live-out from the basic block (i.e. they are
103 : : live at every safepoint following their def). During local
104 : : analysis, we keep, for every safepoint, those values that would
105 : : be live if they were live out. Here we can check if they are
106 : : actually live-out and make the appropriate additions to the live
107 : : set.
108 : :
109 : : Lastly, we also explicitly compute, for each value, the list of values
110 : : that are simultaneously live at some safepoint. This is known as an
111 : : "interference graph" and is the input to the next step.
112 : :
113 : : 4. GC Root coloring
114 : :
115 : : Two values which are not simultaneously live at a safepoint can share the
116 : : same slot. This is an important optimization, because otherwise long
117 : : functions would have exceptionally large GC slots, reducing performance
118 : : and bloating the size of the stack. Assigning values to these slots is
119 : : equivalent to doing graph coloring on the interference graph - the graph
120 : : where nodes are values and two values have an edge if they are
121 : : simultaneously live at a safepoint - which we computed in the previous
122 : : step. Now graph coloring in general is a hard problem. However, for SSA
123 : : form programs, (and most programs in general, by virtue of their
124 : : structure), the resulting interference graphs are chordal and can be
125 : : colored optimally in linear time by performing greedy coloring in a
126 : : perfect elimination order. Now, our interference graphs are likely not
127 : : entirely chordal due to some non-SSA corner cases. However, using the same
128 : : algorithm should still give a very good coloring while having sufficiently
129 : : low runtime.
130 : :
131 : : 5. JLCall frame optimizations
132 : :
133 : : Unlike earlier iterations of the gc root placement logic, jlcall frames
134 : : are no longer treated as a special case and need not necessarily be sunk
135 : : into the gc frame. Additionally, we now emit lifetime
136 : : intrinsics, so regular stack slot coloring will merge any jlcall frames
137 : : not sunk into the gc frame. Nevertheless performing such sinking can still
138 : : be profitable. Since all arguments to a jlcall are guaranteed to be live
139 : : at that call in some gc slot, we can attempt to rearrange the slots within
140 : : the gc-frame, or re-use slots not assigned at that particular location
141 : : for the gcframe. However, even without this optimization, stack frames
142 : : are at most two times larger than optimal (because regular stack coloring
143 : : can merge the jlcall allocas).
144 : :
145 : : N.B.: This step is not yet implemented.
146 : :
147 : : 6. Root placement
148 : :
149 : : This performs the actual insertion of the GCFrame pushes/pops, zeros out
150 : : the gc frame and creates the stores to the gc frame according to the
151 : : stack slot assignment computed in the previous step. GC frames stores
152 : : are generally sunk right before the first safe point that use them
153 : : (this is beneficial for code where the primary path does not have
154 : : safepoints, but some other path - e.g. the error path does). However,
155 : : if the first safepoint is not dominated by the definition (this can
156 : : happen due to the non-ssa corner cases), the store is inserted right after
157 : : the definition.
158 : :
159 : : 7. Cleanup
160 : :
161 : : This step performs necessary cleanup before passing the IR to codegen. In
162 : : particular, it removes any calls to julia_from_objref intrinsics and
163 : : removes the extra operand bundles from ccalls. In the future it could
164 : : also strip the addrspace information from all values as this
165 : : information is no longer needed.
166 : :
167 : :
168 : : There are a couple important special cases that deserve special attention:
169 : :
170 : : A. PHIs and Selects
171 : :
172 : : In general PHIs and selects are treated as separate defs for the purposes
173 : : of the algorithm and their operands as uses of those values. It is
174 : : important to consider however WHERE the uses of PHI's operands are
175 : : located. It is neither at the start of the basic block, because the values
176 : : do not dominate the block (so can't really consider them live-in), nor
177 : : at the end of the predecessor (because they are actually live out).
178 : : Instead it is best to think of those uses as living on the edge between
179 : : the appropriate predecessor and the block containing the PHI.
180 : :
181 : : Another concern is PHIs of derived values. Since we cannot simply root
182 : : these values by storing them to a GC slot, we need to insert a new,
183 : : artificial PHI that tracks the base pointers for the derived values. E.g.
184 : : in:
185 : :
186 : : A:
187 : : %Abase = load addrspace(10) *...
188 : : %Aderived = addrspacecast %Abase to addrspace(11)
189 : : B:
190 : : %Bbase = load addrspace(10) *...
191 : : %Bderived = addrspacecast %Bbase to addrspace(11)
192 : : C:
193 : : %phi = phi [%Aderived, %A
194 : : %Bderived, %B]
195 : :
196 : : we will insert another phi in C to track the relevant base pointers:
197 : :
198 : : %philift = phi [%Abase, %A
199 : : %Bbase, %B]
200 : :
201 : : We then pretend, for the purposes of numbering that %phi was derived from
202 : : %philift. Note that in order to be able to do this, we need to be able to
203 : : perform this lifting either during numbering or instruction scanning.
204 : :
205 : : B. Vectors of pointers/Union representations
206 : :
207 : : Since this pass runs very late in the pass pipeline, it runs after the
208 : : various vectorization passes. As a result, we have to potentially deal
209 : : with vectors of gc-tracked pointers. For the purposes of most of the
210 : : algorithm, we simply assign every element of the vector a separate number
211 : : and no changes are needed. However, those parts of the algorithm that
212 : : look at IR need to be aware of the possibility of encountering vectors of
213 : : pointers.
214 : :
215 : : Similarly, unions (e.g. in call returns) are represented as a struct of
216 : : a gc-tracked value and an argument selector. We simply assign a single
217 : : number to this struct and proceed as if it was a single pointer. However,
218 : : this again requires care at the IR level.
219 : :
220 : : C. Non mem2reg'd allocas
221 : :
222 : : Under some circumstances, allocas will still be present in the IR when
223 : : we get to this pass. We don't try very hard to handle this case, and
224 : : simply sink the alloca into the GCFrame.
225 : : */
226 : :
227 : : struct BBState {
228 : : // Uses in this BB
229 : : // These do not get updated after local analysis
230 : : BitVector Defs;
231 : : BitVector PhiOuts;
232 : : BitVector UpExposedUses;
233 : : // These get updated during dataflow
234 : : BitVector LiveIn;
235 : : BitVector LiveOut;
236 : : std::vector<int> Safepoints;
237 : : int TopmostSafepoint = -1;
238 : : bool HasSafepoint = false;
239 : : // Have we gone through this basic block in our local scan yet?
240 : : bool Done = false;
241 : : };
242 : :
243 : : struct State {
244 : : Function *const F;
245 : : DominatorTree *DT;
246 : :
247 : : // The maximum assigned value number
248 : : int MaxPtrNumber;
249 : : // The maximum assigned safepoint number
250 : : int MaxSafepointNumber;
251 : : // Cache of numbers assigned to IR values. This includes caching of numbers
252 : : // for derived values
253 : : std::map<Value *, int> AllPtrNumbering;
254 : : std::map<Value *, std::vector<int>> AllCompositeNumbering;
255 : : // The reverse of the previous maps
256 : : std::map<int, Value *> ReversePtrNumbering;
257 : : // Neighbors in the coloring interference graph. I.e. for each value, the
258 : : // indices of other values that are used simultaneously at some safe point.
259 : : std::vector<SetVector<int>> Neighbors;
260 : : // The result of the local analysis
261 : : std::map<BasicBlock *, BBState> BBStates;
262 : :
263 : : // Refinement map. If all of the values are rooted
264 : : // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
265 : : // the key is already rooted (but not the other way around).
266 : : // A value that can be refined to -2 never need any rooting or write barrier.
267 : : // A value that can be refined to -1 don't need local root but still need write barrier.
268 : : // At the end of `LocalScan` this map has a few properties
269 : : // 1. Values are either < 0 or dominates the key
270 : : // 2. Therefore this is a DAG
271 : : std::map<int, SmallVector<int, 1>> Refinements;
272 : :
273 : : // GC preserves map. All safepoints dominated by the map key, but not any
274 : : // of its uses need to preserve the values listed in the map value.
275 : : std::map<Instruction *, std::vector<int>> GCPreserves;
276 : :
277 : : // The assignment of numbers to safepoints. The indices in the map
278 : : // are indices into the next three maps which store safepoint properties
279 : : std::map<Instruction *, int> SafepointNumbering;
280 : :
281 : : // Reverse mapping index -> safepoint
282 : : std::vector<Instruction *> ReverseSafepointNumbering;
283 : :
284 : : // Instructions that can return twice. For now, all values live at these
285 : : // instructions will get their own, dedicated GC frame slots, because they
286 : : // have unobservable control flow, so we can't be sure where they're
287 : : // actually live. All of these are also considered safepoints.
288 : : std::vector<Instruction *> ReturnsTwice;
289 : :
290 : : // The set of values live at a particular safepoint
291 : : std::vector<BitVector> LiveSets;
292 : : // Those values that - if live out from our parent basic block - are live
293 : : // at this safepoint.
294 : : std::vector<std::vector<int>> LiveIfLiveOut;
295 : : // The set of values that are kept alive by the callee.
296 : : std::vector<std::vector<int>> CalleeRoots;
297 : : // We don't bother doing liveness on Allocas that were not mem2reg'ed.
298 : : // they just get directly sunk into the root array.
299 : : std::vector<AllocaInst *> Allocas;
300 : : DenseMap<AllocaInst *, unsigned> ArrayAllocas;
301 : : DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
302 : : std::vector<std::pair<StoreInst *, unsigned>> TrackedStores;
303 : 685627 : State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
304 : : };
305 : :
306 : :
307 : :
308 : : struct LateLowerGCFrameLegacy: public FunctionPass {
309 : : static char ID;
310 : 1052 : LateLowerGCFrameLegacy() : FunctionPass(ID) {}
311 : :
312 : : protected:
313 : 1052 : void getAnalysisUsage(AnalysisUsage &AU) const override {
314 : 1052 : FunctionPass::getAnalysisUsage(AU);
315 : 1052 : AU.addRequired<DominatorTreeWrapperPass>();
316 : 1052 : AU.addPreserved<DominatorTreeWrapperPass>();
317 : 1052 : AU.setPreservesCFG();
318 : 1052 : }
319 : :
320 : : private:
321 : : bool runOnFunction(Function &F) override;
322 : : };
323 : :
324 : : struct LateLowerGCFrame: private JuliaPassContext {
325 : : function_ref<DominatorTree &()> GetDT;
326 : 689604 : LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
327 : :
328 : : public:
329 : : bool runOnFunction(Function &F, bool *CFGModified = nullptr);
330 : :
331 : : private:
332 : : CallInst *pgcstack;
333 : :
334 : : void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
335 : : void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses);
336 : 35548800 : void NoteUse(State &S, BBState &BBS, Value *V) {
337 : 35548800 : NoteUse(S, BBS, V, BBS.UpExposedUses);
338 : 35548800 : }
339 : :
340 : : void LiftPhi(State &S, PHINode *Phi);
341 : : void LiftSelect(State &S, SelectInst *SI);
342 : : Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
343 : : std::vector<Value*> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
344 : : Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
345 : :
346 : : int Number(State &S, Value *V);
347 : : int NumberBase(State &S, Value *Base);
348 : : std::vector<int> NumberAll(State &S, Value *V);
349 : : std::vector<int> NumberAllBase(State &S, Value *Base);
350 : :
351 : : void NoteOperandUses(State &S, BBState &BBS, User &UI);
352 : : void MaybeTrackDst(State &S, MemTransferInst *MI);
353 : : void MaybeTrackStore(State &S, StoreInst *I);
354 : : State LocalScan(Function &F);
355 : : void ComputeLiveness(State &S);
356 : : void ComputeLiveSets(State &S);
357 : : std::vector<int> ColorRoots(const State &S);
358 : : void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertBefore);
359 : : void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
360 : : void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
361 : : bool CleanupIR(Function &F, State *S, bool *CFGModified);
362 : : void NoteUseChain(State &S, BBState &BBS, User *TheUser);
363 : : SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
364 : : void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
365 : : void RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots);
366 : : Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
367 : : Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
368 : : };
369 : :
370 : 9567120 : static unsigned getValueAddrSpace(Value *V) {
371 : 9567120 : return V->getType()->getPointerAddressSpace();
372 : : }
373 : :
374 : 16995100 : static bool isTrackedValue(Value *V) {
375 : 16995100 : PointerType *PT = dyn_cast<PointerType>(V->getType()->getScalarType());
376 [ + - + + ]: 16995100 : return PT && PT->getAddressSpace() == AddressSpace::Tracked;
377 : : }
378 : :
379 : 65441100 : static bool isSpecialPtr(Type *Ty) {
380 : 65441100 : PointerType *PTy = dyn_cast<PointerType>(Ty);
381 [ - + ]: 65441100 : if (!PTy)
382 : 0 : return false;
383 : 65441100 : unsigned AS = PTy->getAddressSpace();
384 [ + + + - ]: 65441100 : return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
385 : : }
386 : :
387 : : // return how many Special pointers are in T (count > 0),
388 : : // and if there is anything else in T (all == false)
389 : 40580800 : CountTrackedPointers::CountTrackedPointers(Type *T) {
390 [ + + ]: 40580800 : if (isa<PointerType>(T)) {
391 [ + + ]: 5340690 : if (isSpecialPtr(T)) {
392 : 5302100 : count++;
393 [ + + ]: 5302100 : if (T->getPointerAddressSpace() != AddressSpace::Tracked)
394 : 61362 : derived = true;
395 : : }
396 [ + + + + : 35240100 : } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
+ + + + ]
397 [ + + ]: 17383500 : for (Type *ElT : T->subtypes()) {
398 : 10790900 : auto sub = CountTrackedPointers(ElT);
399 : 10790900 : count += sub.count;
400 : 10790900 : all &= sub.all;
401 : 10790900 : derived |= sub.derived;
402 : : }
403 [ + + ]: 6592650 : if (isa<ArrayType>(T))
404 : 4239450 : count *= cast<ArrayType>(T)->getNumElements();
405 [ + + ]: 2353200 : else if (isa<VectorType>(T)) {
406 : 505242 : ElementCount EC = cast<VectorType>(T)->getElementCount();
407 : 505242 : count *= EC.getKnownMinValue();
408 : : }
409 : : }
410 [ + + ]: 40580800 : if (count == 0)
411 : 32161700 : all = false;
412 : 40580800 : }
413 : :
414 : 694403 : unsigned getCompositeNumElements(Type *T) {
415 [ + + ]: 694403 : if (auto *ST = dyn_cast<StructType>(T))
416 : 301597 : return ST->getNumElements();
417 [ + + ]: 392806 : else if (auto *AT = dyn_cast<ArrayType>(T))
418 : 367807 : return AT->getNumElements();
419 : : else {
420 : 24999 : ElementCount EC = cast<VectorType>(T)->getElementCount();
421 : 24999 : return EC.getKnownMinValue();
422 : : }
423 : : }
424 : :
425 : : // Walk through a Type, and record the element path to every tracked value inside
426 : 2367540 : void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::vector<unsigned>> &Numberings) {
427 [ + + ]: 2367540 : if (isa<PointerType>(T)) {
428 [ + - ]: 1021290 : if (T->getPointerAddressSpace() == AddressSpace::Tracked)
429 : 1021290 : Numberings.push_back(Idxs);
430 : : }
431 [ + + + + : 1346250 : else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
+ + + + ]
432 : 694403 : unsigned Idx, NumEl = getCompositeNumElements(T);
433 [ + + ]: 2632730 : for (Idx = 0; Idx < NumEl; Idx++) {
434 : 1938330 : Idxs.push_back(Idx);
435 : 1938330 : Type *ElT = GetElementPtrInst::getTypeAtIndex(T, Idx);
436 : 1938330 : TrackCompositeType(ElT, Idxs, Numberings);
437 : 1938330 : Idxs.pop_back();
438 : : }
439 : : }
440 : 2367540 : }
441 : :
442 : 429215 : std::vector<std::vector<unsigned>> TrackCompositeType(Type *T) {
443 : 858430 : std::vector<unsigned> Idxs;
444 : 429215 : std::vector<std::vector<unsigned>> Numberings;
445 : 429215 : TrackCompositeType(T, Idxs, Numberings);
446 : 429215 : return Numberings;
447 : : }
448 : :
449 : :
450 : :
451 : : // Walk through simple expressions to until we hit something that requires root numbering
452 : : // If the input value is a scalar (pointer), we may return a composite value as base
453 : : // in which case the second member of the pair is the index of the value in the vector.
454 : 20262900 : static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCache = true) {
455 : 20262900 : Value *CurrentV = V;
456 : 20262900 : int fld_idx = -1;
457 : : while (true) {
458 [ + + ]: 31961500 : if (UseCache) {
459 [ + + ]: 31397300 : if (CurrentV->getType()->isPointerTy()) {
460 : 31021900 : auto it = S.AllPtrNumbering.find(CurrentV);
461 [ + + ]: 31021900 : if (it != S.AllPtrNumbering.end())
462 : 13805200 : return std::make_pair(CurrentV, fld_idx);
463 : : } else {
464 : 375380 : auto it = S.AllCompositeNumbering.find(CurrentV);
465 [ + + ]: 375380 : if (it != S.AllCompositeNumbering.end())
466 : 2188 : return std::make_pair(CurrentV, fld_idx);
467 : : }
468 : : }
469 : : // Note that this is true:
470 : : // assert(fld_idx == -1 ? CurrentV->getType()->isPointerTy() : CurrentV->getType()->isVectorPointerTy());
471 [ + + ]: 18154100 : if (isa<BitCastInst>(CurrentV))
472 : 3942800 : CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
473 [ + + ]: 14211300 : else if (isa<AddrSpaceCastInst>(CurrentV)) {
474 : 4283440 : Value *NewV = cast<AddrSpaceCastInst>(CurrentV)->getOperand(0);
475 [ + + ]: 4283440 : if (getValueAddrSpace(NewV) == 0)
476 : 680367 : break;
477 : 3603070 : CurrentV = NewV;
478 [ - + ]: 9927870 : } else if (auto *Freeze = dyn_cast<FreezeInst>(CurrentV)) {
479 : 0 : CurrentV = Freeze->getOperand(0); // Can be formed by optimizations, treat as a no-op
480 [ + + ]: 9927870 : } else if (auto *GEP = dyn_cast<GetElementPtrInst>(CurrentV)) {
481 : 3153270 : CurrentV = GEP->getOperand(0);
482 : : // GEP can make vectors from a single base pointer
483 [ + + - + : 3153270 : if (fld_idx != -1 && !isa<VectorType>(CurrentV->getType())) {
- + ]
484 : 0 : fld_idx = -1;
485 : : }
486 : : }
487 [ + + ]: 6774600 : else if (auto EEI = dyn_cast<ExtractElementInst>(CurrentV)) {
488 [ + - + - ]: 2466 : assert(CurrentV->getType()->isPointerTy() && fld_idx == -1);
489 : : // TODO: For now, only support constant index.
490 : 2466 : auto IdxOp = cast<ConstantInt>(EEI->getIndexOperand());
491 : 2466 : fld_idx = IdxOp->getLimitedValue(INT_MAX);
492 : 2466 : CurrentV = EEI->getVectorOperand();
493 : : }
494 [ + + ]: 6772130 : else if (auto LI = dyn_cast<LoadInst>(CurrentV)) {
495 [ + + ]: 2943500 : if (auto PtrT = dyn_cast<PointerType>(LI->getType()->getScalarType())) {
496 [ + + ]: 2915090 : if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
497 : 996920 : CurrentV = LI->getPointerOperand();
498 : 996920 : fld_idx = -1;
499 [ - + ]: 996920 : if (!isSpecialPtr(CurrentV->getType())) {
500 : : // This could really be anything, but it's not loaded
501 : : // from a tracked pointer, so it doesn't matter what
502 : : // it is--just pick something simple.
503 : 0 : CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
504 : : }
505 : 996920 : continue;
506 : : }
507 : : }
508 : : // In general a load terminates a walk
509 : 1946580 : break;
510 : : }
511 [ + + ]: 3828630 : else if (auto LI = dyn_cast<AtomicCmpXchgInst>(CurrentV)) {
512 : : // In general a load terminates a walk
513 : : (void)LI;
514 : 5879 : break;
515 : : }
516 [ - + ]: 3822750 : else if (auto LI = dyn_cast<AtomicRMWInst>(CurrentV)) {
517 : : // In general a load terminates a walk
518 : : (void)LI;
519 : 0 : break;
520 : : }
521 [ + + ]: 3822750 : else if (auto II = dyn_cast<IntrinsicInst>(CurrentV)) {
522 : : // Some intrinsics behave like LoadInst followed by a SelectInst
523 : : // This should never happen in a derived addrspace (since those cannot be stored to memory)
524 : : // so we don't need to lift these operations, but we do need to check if it's loaded and continue walking the base pointer
525 [ + - + - : 24 : if (II->getIntrinsicID() == Intrinsic::masked_load ||
+ - ]
526 : 12 : II->getIntrinsicID() == Intrinsic::masked_gather) {
527 [ + - ]: 12 : if (auto VTy = dyn_cast<VectorType>(II->getType())) {
528 [ + - ]: 12 : if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
529 [ - + ]: 12 : if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
530 : 0 : Value *Mask = II->getOperand(2);
531 : 0 : Value *Passthrough = II->getOperand(3);
532 [ # # # # : 0 : if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
# # ]
533 [ # # ]: 0 : assert(isa<UndefValue>(Passthrough) && "unimplemented");
534 : : (void)Passthrough;
535 : : }
536 : 0 : CurrentV = II->getOperand(0);
537 [ # # ]: 0 : if (II->getIntrinsicID() == Intrinsic::masked_load) {
538 : 0 : fld_idx = -1;
539 [ # # ]: 0 : if (!isSpecialPtr(CurrentV->getType())) {
540 : 0 : CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
541 : : }
542 : : } else {
543 [ # # ]: 0 : if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
544 [ # # ]: 0 : if (!isSpecialPtr(VTy2->getElementType())) {
545 : 0 : CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
546 : 0 : fld_idx = -1;
547 : : }
548 : : }
549 : : }
550 : 0 : continue;
551 : : }
552 : : }
553 : : }
554 : : // In general a load terminates a walk
555 : 12 : break;
556 : : }
557 : : }
558 : : else {
559 : 3822740 : break;
560 : : }
561 : 11698500 : }
562 [ + + + + : 6455580 : assert(isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) ||
+ + + - +
+ + + + +
+ + + + +
- + + + +
+ + - + ]
563 : : isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV) ||
564 : : isa<Argument>(CurrentV) || isa<SelectInst>(CurrentV) ||
565 : : isa<PHINode>(CurrentV) || isa<AddrSpaceCastInst>(CurrentV) ||
566 : : isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) ||
567 : : isa<InsertValueInst>(CurrentV) ||
568 : : isa<ExtractValueInst>(CurrentV) ||
569 : : isa<InsertElementInst>(CurrentV) ||
570 : : isa<ShuffleVectorInst>(CurrentV));
571 : 6455580 : return std::make_pair(CurrentV, fld_idx);
572 : : }
573 : :
574 : 2149640 : Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore) {
575 : 2149640 : Value *V = ValExpr.first;
576 [ + + ]: 2149640 : if (isa<PointerType>(V->getType())) {
577 [ - + ]: 2036930 : assert(ValExpr.second == -1);
578 [ + + ]: 2036930 : if (!isTrackedValue(V)) {
579 : 29872 : int BaseNumber = NumberBase(S, V);
580 [ + + ]: 29872 : if (BaseNumber >= 0)
581 : 9968 : V = GetPtrForNumber(S, BaseNumber, InsertBefore);
582 : : else
583 : 19904 : V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
584 : : }
585 : : }
586 [ + - ]: 112703 : else if (ValExpr.second != -1) {
587 : 112703 : auto Tracked = TrackCompositeType(V->getType());
588 : 112703 : auto Idxs = makeArrayRef(Tracked.at(ValExpr.second));
589 : 112703 : auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
590 : 112703 : Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
591 : 112703 : bool IsVector = isa<VectorType>(FinalT);
592 : 112703 : PointerType *T = cast<PointerType>(
593 : 112703 : GetElementPtrInst::getTypeAtIndex(FinalT, Idxs.back()));
594 [ - + ]: 112703 : if (T->getAddressSpace() != AddressSpace::Tracked) {
595 : : // if V isn't tracked, get the shadow def
596 : 0 : auto Numbers = NumberAllBase(S, V);
597 : 0 : int BaseNumber = Numbers.at(ValExpr.second);
598 [ # # ]: 0 : if (BaseNumber >= 0)
599 : 0 : V = GetPtrForNumber(S, BaseNumber, InsertBefore);
600 : : else
601 : 0 : V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
602 : 0 : return V;
603 : : }
604 [ + + ]: 112703 : if (Idxs.size() > IsVector)
605 [ - + ]: 87708 : V = ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs, "", InsertBefore);
606 [ + + ]: 112703 : if (IsVector)
607 : 49990 : V = ExtractElementInst::Create(V,
608 : 24995 : ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()),
609 : : "", InsertBefore);
610 : : }
611 : 2149640 : return V;
612 : : }
613 : :
614 : 0 : std::vector<Value*> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
615 : 0 : auto Numbers = NumberAllBase(S, BaseVec);
616 : 0 : std::vector<Value*> V{Numbers.size()};
617 : 0 : Value *V_rnull = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
618 [ # # ]: 0 : for (unsigned i = 0; i < V.size(); ++i) {
619 [ # # ]: 0 : if (Numbers[i] >= 0) // ignores undef and poison values
620 : 0 : V[i] = GetPtrForNumber(S, Numbers[i], InsertBefore);
621 : : else
622 : 0 : V[i] = V_rnull;
623 : : }
624 : 0 : return V;
625 : : }
626 : :
627 : 1887120 : Value *LateLowerGCFrame::GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore)
628 : : {
629 : 1887120 : Value *Val = S.ReversePtrNumbering[Num];
630 : 1887120 : unsigned Idx = -1;
631 [ + + ]: 1887120 : if (!isa<PointerType>(Val->getType())) {
632 : 85493 : const std::vector<int> &AllNums = S.AllCompositeNumbering[Val];
633 [ + - ]: 148382 : for (Idx = 0; Idx < AllNums.size(); ++Idx) {
634 [ + + ]: 148382 : if ((unsigned)AllNums[Idx] == Num)
635 : 85493 : break;
636 : : }
637 [ - + ]: 85493 : assert(Idx < AllNums.size());
638 : : }
639 : 1887120 : return MaybeExtractScalar(S, std::make_pair(Val, Idx), InsertBefore);
640 : : }
641 : :
642 : 23523 : void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
643 [ + - ]: 23523 : if (isa<PointerType>(SI->getType()) ?
644 : 23523 : S.AllPtrNumbering.count(SI) :
645 [ + + ]: 23523 : S.AllCompositeNumbering.count(SI)) {
646 : : // already visited here--nothing to do
647 : 4998 : return;
648 : : }
649 : 18525 : std::vector<int> Numbers;
650 : 18525 : unsigned NumRoots = 1;
651 [ - + ]: 18525 : if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
652 : 0 : ElementCount EC = VTy->getElementCount();
653 : 0 : Numbers.resize(EC.getKnownMinValue(), -1);
654 : : }
655 : : else
656 [ + - ]: 18525 : assert(isa<PointerType>(SI->getType()) && "unimplemented");
657 [ - + ]: 18525 : assert(!isTrackedValue(SI));
658 : : // find the base root for the arguments
659 : 18525 : Value *TrueBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getTrueValue(), false), SI);
660 : 18525 : Value *FalseBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getFalseValue(), false), SI);
661 : 18525 : std::vector<Value*> TrueBases;
662 : 18525 : std::vector<Value*> FalseBases;
663 [ - + ]: 18525 : if (!isa<PointerType>(TrueBase->getType())) {
664 : 0 : TrueBases = MaybeExtractVector(S, TrueBase, SI);
665 [ # # ]: 0 : assert(TrueBases.size() == Numbers.size());
666 : 0 : NumRoots = TrueBases.size();
667 : : }
668 [ - + ]: 18525 : if (!isa<PointerType>(FalseBase->getType())) {
669 : 0 : FalseBases = MaybeExtractVector(S, FalseBase, SI);
670 [ # # ]: 0 : assert(FalseBases.size() == Numbers.size());
671 : 0 : NumRoots = FalseBases.size();
672 : : }
673 [ + - ]: 18525 : if (isa<PointerType>(SI->getType()) ?
674 : 18525 : S.AllPtrNumbering.count(SI) :
675 [ - + ]: 18525 : S.AllCompositeNumbering.count(SI)) {
676 : : // MaybeExtractScalar or MaybeExtractVector handled this for us (recursively, though a PHINode)
677 : 0 : return;
678 : : }
679 : : // need to handle each element (may just be one scalar)
680 [ + + ]: 37050 : for (unsigned i = 0; i < NumRoots; ++i) {
681 : : Value *TrueElem;
682 [ + - ]: 18525 : if (isa<PointerType>(TrueBase->getType()))
683 : 18525 : TrueElem = TrueBase;
684 : : else
685 : 0 : TrueElem = TrueBases[i];
686 : : Value *FalseElem;
687 [ + - ]: 18525 : if (isa<PointerType>(FalseBase->getType()))
688 : 18525 : FalseElem = FalseBase;
689 : : else
690 : 0 : FalseElem = FalseBases[i];
691 : 18525 : Value *Cond = SI->getCondition();
692 [ - + ]: 18525 : if (isa<VectorType>(Cond->getType())) {
693 : 0 : Cond = ExtractElementInst::Create(Cond,
694 : 0 : ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
695 : : "", SI);
696 : : }
697 [ - + ]: 18525 : if (FalseElem->getType() != TrueElem->getType())
698 : 0 : FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
699 : 18525 : SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
700 : 18525 : int Number = ++S.MaxPtrNumber;
701 : 18525 : S.AllPtrNumbering[SelectBase] = Number;
702 : 18525 : S.ReversePtrNumbering[Number] = SelectBase;
703 [ + - ]: 18525 : if (isa<PointerType>(SI->getType()))
704 : 18525 : S.AllPtrNumbering[SI] = Number;
705 : : else
706 : 0 : Numbers[i] = Number;
707 : : }
708 [ - + ]: 18525 : if (auto VTy = dyn_cast<FixedVectorType>(SI->getType())) {
709 [ # # ]: 0 : if (NumRoots != Numbers.size()) {
710 : : // broadcast the scalar root number to fill the vector
711 [ # # ]: 0 : assert(NumRoots == 1);
712 : 0 : int Number = Numbers[0];
713 : 0 : Numbers.resize(0);
714 : 0 : ElementCount EC = VTy->getElementCount();
715 : 0 : Numbers.resize(EC.getKnownMinValue(), Number);
716 : : }
717 : : }
718 [ - + ]: 18525 : if (!isa<PointerType>(SI->getType()))
719 : 0 : S.AllCompositeNumbering[SI] = Numbers;
720 : : }
721 : :
722 : 76673 : void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
723 [ + - ]: 76673 : if (isa<PointerType>(Phi->getType()) ?
724 : 76673 : S.AllPtrNumbering.count(Phi) :
725 [ + + ]: 76673 : S.AllCompositeNumbering.count(Phi))
726 : 35162 : return;
727 : : // need to handle each element (may just be one scalar)
728 : 83022 : SmallVector<PHINode *, 2> lifted;
729 : 83022 : std::vector<int> Numbers;
730 : 41511 : unsigned NumRoots = 1;
731 [ - + ]: 41511 : if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
732 : 0 : NumRoots = VTy->getNumElements();
733 : 0 : Numbers.resize(NumRoots);
734 : : }
735 : : else {
736 : : // TODO: SVE
737 [ + - ]: 41511 : assert(isa<PointerType>(Phi->getType()) && "unimplemented");
738 : : }
739 [ + + ]: 83022 : for (unsigned i = 0; i < NumRoots; ++i) {
740 : 41511 : PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi);
741 : 41511 : int Number = ++S.MaxPtrNumber;
742 : 41511 : S.AllPtrNumbering[lift] = Number;
743 : 41511 : S.ReversePtrNumbering[Number] = lift;
744 [ + - ]: 41511 : if (!isa<VectorType>(Phi->getType()))
745 : 41511 : S.AllPtrNumbering[Phi] = Number;
746 : : else
747 : 0 : Numbers[i] = Number;
748 : 41511 : lifted.push_back(lift);
749 : : }
750 [ - + ]: 41511 : if (!isa<PointerType>(Phi->getType()))
751 : 0 : S.AllCompositeNumbering[Phi] = Numbers;
752 [ + + ]: 155950 : for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) {
753 : 114439 : Value *Incoming = Phi->getIncomingValue(i);
754 : 114439 : BasicBlock *IncomingBB = Phi->getIncomingBlock(i);
755 : 114439 : Instruction *Terminator = IncomingBB->getTerminator();
756 : 114439 : Value *Base = MaybeExtractScalar(S, FindBaseValue(S, Incoming, false), Terminator);
757 : 228878 : std::vector<Value*> IncomingBases;
758 [ - + ]: 114439 : if (!isa<PointerType>(Base->getType())) {
759 : 0 : IncomingBases = MaybeExtractVector(S, Base, Terminator);
760 [ # # ]: 0 : assert(IncomingBases.size() == NumRoots);
761 : : }
762 [ + + ]: 228878 : for (unsigned i = 0; i < NumRoots; ++i) {
763 : 114439 : PHINode *lift = lifted[i];
764 : : Value *BaseElem;
765 [ + - ]: 114439 : if (isa<PointerType>(Base->getType()))
766 : 114439 : BaseElem = Base;
767 : : else
768 : 0 : BaseElem = IncomingBases[i];
769 [ + + ]: 114439 : if (BaseElem->getType() != T_prjlvalue)
770 : 15905 : BaseElem = new BitCastInst(BaseElem, T_prjlvalue, "", Terminator);
771 : 114439 : lift->addIncoming(BaseElem, IncomingBB);
772 : : }
773 : : }
774 : : }
775 : :
776 : 19766100 : int LateLowerGCFrame::NumberBase(State &S, Value *CurrentV)
777 : : {
778 : 19766100 : auto it = S.AllPtrNumbering.find(CurrentV);
779 [ + + ]: 19766100 : if (it != S.AllPtrNumbering.end())
780 : 13814900 : return it->second;
781 : : int Number;
782 [ + + ]: 5951180 : if (isa<Constant>(CurrentV)) {
783 : : // Perm rooted
784 : 103073 : Number = -2;
785 [ + + + - : 11829000 : } else if (isa<Argument>(CurrentV) || isa<AllocaInst>(CurrentV) ||
+ + + + ]
786 [ + + ]: 5980920 : (isa<AddrSpaceCastInst>(CurrentV) && !isTrackedValue(CurrentV))) {
787 : : // We know this is rooted in the parent
788 : : // future note: we could chose to exclude argument of type CalleeRooted here
789 : 982464 : Number = -1;
790 [ - + ]: 4865650 : } else if (!isSpecialPtr(CurrentV->getType())) {
791 : : // Externally rooted somehow hopefully (otherwise there's a bug in the
792 : : // input IR)
793 : 0 : Number = -1;
794 [ + + + + : 4865650 : } else if (isa<SelectInst>(CurrentV) && !isTrackedValue(CurrentV)) {
+ + ]
795 : 4998 : LiftSelect(S, cast<SelectInst>(CurrentV));
796 : 4998 : Number = S.AllPtrNumbering.at(CurrentV);
797 : 4998 : return Number;
798 [ + + + + : 4860650 : } else if (isa<PHINode>(CurrentV) && !isTrackedValue(CurrentV)) {
+ + ]
799 : 35162 : LiftPhi(S, cast<PHINode>(CurrentV));
800 : 35162 : Number = S.AllPtrNumbering.at(CurrentV);
801 : 35162 : return Number;
802 [ + + ]: 4825490 : } else if (isa<ExtractValueInst>(CurrentV)) {
803 : 115767 : auto Numbers = NumberAllBase(S, CurrentV);
804 [ - + ]: 115767 : assert(Numbers.size() == 1);
805 : 115767 : Number = Numbers[0];
806 : : } else {
807 [ + - + - ]: 4709720 : assert((CurrentV->getType()->isPointerTy() && isTrackedValue(CurrentV)));
808 : 4709720 : Number = ++S.MaxPtrNumber;
809 : 4709720 : S.ReversePtrNumbering[Number] = CurrentV;
810 : : }
811 : 5911020 : S.AllPtrNumbering[CurrentV] = Number;
812 : 5911020 : return Number;
813 : : }
814 : :
815 : 19695400 : int LateLowerGCFrame::Number(State &S, Value *V) {
816 [ - + ]: 19695400 : assert(isSpecialPtr(V->getType()));
817 : 19695400 : auto CurrentV = FindBaseValue(S, V);
818 : : int Number;
819 [ + + ]: 19695400 : if (CurrentV.second == -1) {
820 : 19693200 : Number = NumberBase(S, CurrentV.first);
821 : : } else {
822 : 2254 : auto Numbers = NumberAllBase(S, CurrentV.first);
823 : 2254 : Number = Numbers.at(CurrentV.second);
824 : : }
825 [ + + ]: 19695400 : if (V != CurrentV.first)
826 : 5343400 : S.AllPtrNumbering[V] = Number;
827 : 19695400 : return Number;
828 : : }
829 : :
830 : : // assign pointer numbers to a def instruction
831 : 490956 : std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
832 [ + + ]: 490956 : if (isa<PointerType>(CurrentV->getType())) {
833 : 115767 : auto it = S.AllPtrNumbering.find(CurrentV);
834 [ - + ]: 115767 : if (it != S.AllPtrNumbering.end())
835 : 0 : return std::vector<int>({it->second});
836 : : } else {
837 : 375189 : auto it = S.AllCompositeNumbering.find(CurrentV);
838 [ + + ]: 375189 : if (it != S.AllCompositeNumbering.end())
839 : 2188 : return it->second;
840 : : }
841 : :
842 : 977536 : std::vector<int> Numbers;
843 : 488768 : auto tracked = CountTrackedPointers(CurrentV->getType());
844 [ - + ]: 488768 : if (tracked.count == 0)
845 : 0 : return Numbers;
846 [ + + + - : 924787 : if (isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) || isa<Argument>(CurrentV) ||
+ - - + +
+ ]
847 [ - - ]: 436019 : (isa<AddrSpaceCastInst>(CurrentV) && !isTrackedValue(CurrentV))) {
848 : 52749 : Numbers.resize(tracked.count, -1);
849 : : }
850 [ + + ]: 436019 : else if (auto *SVI = dyn_cast<ShuffleVectorInst>(CurrentV)) {
851 : 1250 : std::vector<int> Numbers1 = NumberAll(S, SVI->getOperand(0));
852 : 1250 : std::vector<int> Numbers2 = NumberAll(S, SVI->getOperand(1));
853 : 625 : auto Mask = SVI->getShuffleMask();
854 [ + + ]: 3125 : for (auto idx : Mask) {
855 [ + + ]: 2500 : if (idx == -1) {
856 : 922 : Numbers.push_back(-1);
857 [ + + ]: 1578 : } else if ((unsigned)idx < Numbers1.size()) {
858 : 1306 : Numbers.push_back(Numbers1.at(idx));
859 : : } else {
860 : 272 : Numbers.push_back(Numbers2.at(idx - Numbers1.size()));
861 : : }
862 : : }
863 [ + + ]: 435394 : } else if (auto *IEI = dyn_cast<InsertElementInst>(CurrentV)) {
864 : : // TODO: handle non-constant: LiftInsertElement(S, IEI)
865 : 1345 : unsigned idx = cast<ConstantInt>(IEI->getOperand(2))->getZExtValue();
866 : 1345 : Numbers = NumberAll(S, IEI->getOperand(0));
867 : 1345 : int ElNumber = Number(S, IEI->getOperand(1));
868 : 1345 : Numbers[idx] = ElNumber;
869 [ + + ]: 434049 : } else if (auto *IVI = dyn_cast<InsertValueInst>(CurrentV)) {
870 : 146708 : Numbers = NumberAll(S, IVI->getAggregateOperand());
871 : 293416 : auto Tracked = TrackCompositeType(IVI->getType());
872 [ - + ]: 146708 : assert(Tracked.size() == Numbers.size());
873 : 293416 : std::vector<int> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
874 : 146708 : auto Idxs = IVI->getIndices();
875 : 146708 : unsigned j = 0;
876 [ + + ]: 529544 : for (unsigned i = 0; i < Tracked.size(); ++i) {
877 : 382836 : auto Elem = makeArrayRef(Tracked[i]);
878 [ + + ]: 382836 : if (Elem.size() < Idxs.size())
879 : 8904 : continue;
880 [ + + ]: 373932 : if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
881 : 107449 : Numbers[i] = InsertNumbers[j++];
882 : : }
883 [ - + ]: 146708 : assert(j == InsertNumbers.size());
884 [ + + ]: 287341 : } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurrentV)) {
885 : 242948 : auto BaseNumbers = NumberAll(S, EVI->getAggregateOperand());
886 : 121474 : auto Tracked = TrackCompositeType(EVI->getAggregateOperand()->getType());
887 [ - + ]: 121474 : assert(Tracked.size() == BaseNumbers.size());
888 : 121474 : auto Idxs = EVI->getIndices();
889 [ + + ]: 372642 : for (unsigned i = 0; i < Tracked.size(); ++i) {
890 : 251168 : auto Elem = makeArrayRef(Tracked[i]);
891 [ + + ]: 251168 : if (Elem.size() < Idxs.size())
892 : 7360 : continue;
893 [ + + ]: 243808 : if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
894 : 130674 : Numbers.push_back(BaseNumbers[i]);
895 : : }
896 [ - + ]: 121474 : assert(CountTrackedPointers(EVI->getType()).count == Numbers.size());
897 [ - + ]: 165867 : } else if (tracked.derived) {
898 [ # # ]: 0 : if (isa<SelectInst>(CurrentV)) {
899 : 0 : LiftSelect(S, cast<SelectInst>(CurrentV));
900 [ # # ]: 0 : } else if (isa<PHINode>(CurrentV)) {
901 : 0 : LiftPhi(S, cast<PHINode>(CurrentV));
902 : : // } else if (isa<ExtractElementInst>(CurrentV)) { // TODO: lifting for non constant index
903 : : } else {
904 : 0 : CurrentV->print(errs());
905 : 0 : llvm_unreachable("Unexpected generating operation for derived values");
906 : : }
907 [ # # ]: 0 : if (isa<PointerType>(CurrentV->getType())) {
908 : 0 : auto Number = S.AllPtrNumbering.at(CurrentV);
909 : 0 : Numbers.resize(1, Number);
910 : : } else {
911 : 0 : Numbers = S.AllCompositeNumbering.at(CurrentV);
912 : : }
913 : : } else {
914 [ + + + + : 165867 : assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
+ + + + -
+ - - ]
915 : : isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV))
916 : : && "unexpected def expression");
917 : : // This is simple, we can just number them sequentially
918 [ + + ]: 451114 : for (unsigned i = 0; i < tracked.count; ++i) {
919 : 285247 : int Num = ++S.MaxPtrNumber;
920 : 285247 : Numbers.push_back(Num);
921 : 285247 : S.ReversePtrNumbering[Num] = CurrentV;
922 : : }
923 : : }
924 [ + + ]: 488768 : if (isa<PointerType>(CurrentV->getType())) {
925 [ - + ]: 115767 : assert(Numbers.size() == 1);
926 : 115767 : S.AllPtrNumbering[CurrentV] = Numbers[0];
927 : : } else {
928 : 373001 : S.AllCompositeNumbering[CurrentV] = Numbers;
929 : : }
930 : 488768 : return Numbers;
931 : : }
932 : :
933 : : // gets the pointer number for every gc tracked value inside V
934 : 11601300 : std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
935 [ + + ]: 11601300 : if (isa<PointerType>(V->getType())) {
936 : 91450 : auto it = S.AllPtrNumbering.find(V);
937 [ + + ]: 91450 : if (it != S.AllPtrNumbering.end())
938 : 48200 : return std::vector<int>({it->second});
939 : : } else {
940 : 11509800 : auto it = S.AllCompositeNumbering.find(V);
941 [ + + ]: 11509800 : if (it != S.AllCompositeNumbering.end())
942 : 264787 : return it->second;
943 : : }
944 : 22576500 : std::vector<int> Numbers;
945 : 11288300 : auto tracked = CountTrackedPointers(V->getType());
946 [ + + ]: 11288300 : if (tracked.count == 0)
947 : 10872200 : return Numbers;
948 : 416039 : auto CurrentV = FindBaseValue(S, V);
949 : 416039 : int Number = -1;
950 [ + + ]: 416039 : if (isa<PointerType>(CurrentV.first->getType())) {
951 : : // Base turned out to be a single pointer--number it
952 [ - + ]: 43104 : assert(CurrentV.second == -1);
953 : 43104 : Number = NumberBase(S, CurrentV.first);
954 : 43104 : Numbers.resize(tracked.count, Number);
955 : : } else {
956 : : // Base turned out to be an aggregate--get all numbers for it, then sub-select
957 : 372935 : Numbers = NumberAllBase(S, CurrentV.first);
958 [ + + ]: 372935 : if (CurrentV.second != -1) {
959 : 212 : Number = Numbers[CurrentV.second]; // only needed a subset of the values
960 : 212 : Numbers.resize(tracked.count, Number);
961 : : }
962 : : else {
963 [ - + ]: 372723 : assert(!isa<PointerType>(V->getType()));
964 : : }
965 : : }
966 [ + + ]: 416039 : if (CurrentV.first != V) {
967 [ + + ]: 1098 : if (isa<PointerType>(V->getType())) {
968 : 983 : S.AllPtrNumbering[V] = Number;
969 : : } else {
970 : 115 : S.AllCompositeNumbering[V] = Numbers;
971 : : }
972 : : }
973 : 416039 : return Numbers;
974 : : }
975 : :
976 : :
977 : 34745400 : static void MaybeResize(BBState &BBS, unsigned Idx) {
978 [ + + ]: 34745400 : if (BBS.Defs.size() <= Idx) {
979 : 22016400 : BBS.Defs.resize(Idx + 1);
980 : 22016400 : BBS.UpExposedUses.resize(Idx + 1);
981 : 22016400 : BBS.PhiOuts.resize(Idx + 1);
982 : : }
983 : 34745400 : }
984 : :
985 : 805273000 : static bool HasBitSet(const BitVector &BV, unsigned Bit) {
986 [ + + + + ]: 805273000 : return Bit < BV.size() && BV[Bit];
987 : : }
988 : :
989 : 5055000 : static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &SafepointsSoFar) {
990 [ - + ]: 5055000 : assert(Num >= 0);
991 : 5055000 : MaybeResize(BBS, Num);
992 [ + - ]: 5055000 : assert(BBS.Defs[Num] == 0 && "SSA Violation or misnumbering?");
993 : 5055000 : BBS.Defs[Num] = 1;
994 : 5055000 : BBS.UpExposedUses[Num] = 0;
995 : : // This value could potentially be live at any following safe point
996 : : // if it ends up live out, so add it to the LiveIfLiveOut lists for all
997 : : // following safepoints.
998 [ + + ]: 11465700 : for (int Safepoint : SafepointsSoFar) {
999 : 6410710 : S.LiveIfLiveOut[Safepoint].push_back(Num);
1000 : : }
1001 : 5055000 : }
1002 : :
1003 : 13399900 : void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr) {
1004 : 13399900 : Type *RT = Def->getType();
1005 [ + + ]: 13399900 : if (isa<PointerType>(RT)) {
1006 [ + + ]: 6935870 : if (!isSpecialPtr(RT))
1007 : 2166110 : return;
1008 [ + - ]: 4769760 : assert(isTrackedValue(Def) && "Returned value of GC interest, but not tracked?");
1009 : 4769760 : int Num = Number(S, Def);
1010 : 4769760 : NoteDef(S, BBS, Num, SafepointsSoFar);
1011 [ + + ]: 4769760 : if (!RefinedPtr.empty())
1012 : 1974860 : S.Refinements[Num] = std::move(RefinedPtr);
1013 : : }
1014 : : else {
1015 : 12928100 : std::vector<int> Nums = NumberAll(S, Def);
1016 [ + + ]: 6749290 : for (int Num : Nums) {
1017 : 285247 : NoteDef(S, BBS, Num, SafepointsSoFar);
1018 [ + + ]: 285247 : if (!RefinedPtr.empty())
1019 : 25718 : S.Refinements[Num] = RefinedPtr;
1020 : : }
1021 : : }
1022 : : }
1023 : :
1024 : 4213610 : static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
1025 : 4213610 : int Number = ++S.MaxSafepointNumber;
1026 : 4213610 : S.SafepointNumbering[CI] = Number;
1027 : 4213610 : S.ReverseSafepointNumbering.push_back(CI);
1028 : : // Note which pointers are upward exposed live here. They need to be
1029 : : // considered live at this safepoint even when they have a def earlier
1030 : : // in this BB (i.e. even when they don't participate in the dataflow
1031 : : // computation)
1032 : 4213610 : S.LiveSets.push_back(BBS.UpExposedUses);
1033 : 4213610 : S.LiveIfLiveOut.push_back(std::vector<int>{});
1034 : 4213610 : S.CalleeRoots.push_back(std::move(CalleeRoots));
1035 : 4213610 : return Number;
1036 : : }
1037 : :
1038 : 36711500 : void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses) {
1039 : : // Short circuit to avoid having to deal with vectors of constants, etc.
1040 [ + + ]: 36711500 : if (isa<Constant>(V))
1041 : 13977300 : return;
1042 [ + + ]: 22734100 : if (isa<PointerType>(V->getType())) {
1043 [ + + ]: 18048900 : if (isSpecialPtr(V->getType())) {
1044 : 11513500 : int Num = Number(S, V);
1045 [ + + ]: 11513500 : if (Num < 0)
1046 : 2860150 : return;
1047 : 8653360 : MaybeResize(BBS, Num);
1048 : 8653360 : Uses[Num] = 1;
1049 : : }
1050 : : } else {
1051 : 9370440 : std::vector<int> Nums = NumberAll(S, V);
1052 [ + + ]: 4966060 : for (int Num : Nums) {
1053 [ + + ]: 280839 : if (Num < 0)
1054 : 12795 : continue;
1055 : 268044 : MaybeResize(BBS, Num);
1056 : 268044 : Uses[Num] = 1;
1057 : : }
1058 : : }
1059 : : }
1060 : :
1061 : 18349200 : void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI) {
1062 [ + + ]: 53898000 : for (Use &U : UI.operands()) {
1063 : 35548800 : NoteUse(S, BBS, U);
1064 : : }
1065 : 18349200 : }
1066 : :
1067 : : template <typename VisitInst, typename callback>
1068 : 565611 : void RecursivelyVisit(callback f, Value *V) {
1069 [ + + ]: 1738780 : for (Use &VU : V->uses()) {
1070 : 1173170 : User *TheUser = VU.getUser();
1071 [ + + ]: 1173170 : if (isa<VisitInst>(TheUser))
1072 : 22790 : f(VU);
1073 [ + + + - ]: 2747000 : if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
1074 [ + + + + ]: 1878270 : isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) || // TODO: should these be removed from this list?
1075 [ + - + + ]: 1071200 : isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser) ||
1076 [ + - ]: 444670 : isa<ICmpInst>(TheUser) || // ICmpEQ/ICmpNE can be used with ptr types
1077 [ + + - + : 2342930 : isa<AtomicCmpXchgInst>(TheUser) || isa<AtomicRMWInst>(TheUser))
+ + ]
1078 : 951107 : continue;
1079 [ + + + + : 222065 : if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
+ - + - ]
1080 : 222065 : RecursivelyVisit<VisitInst, callback>(f, TheUser);
1081 : 222065 : continue;
1082 : : }
1083 : 0 : llvm_dump(V);
1084 : 0 : llvm_dump(TheUser);
1085 : 0 : assert(false && "Unexpected instruction");
1086 : : }
1087 : 565611 : }
1088 : :
1089 : 0 : static void dumpBitVectorValues(State &S, BitVector &BV) {
1090 : 0 : bool first = true;
1091 [ # # ]: 0 : for (int Idx = BV.find_first(); Idx >= 0; Idx = BV.find_next(Idx)) {
1092 [ # # ]: 0 : if (!first)
1093 : 0 : dbgs() << ", ";
1094 : 0 : first = false;
1095 : 0 : S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
1096 : : }
1097 : 0 : }
1098 : :
1099 : : /* Debugging utility to dump liveness information */
1100 : 0 : JL_USED_FUNC static void dumpLivenessState(Function &F, State &S) {
1101 [ # # ]: 0 : for (auto &BB : F) {
1102 : 0 : dbgs() << "Liveness analysis for BB " << BB.getName();
1103 : 0 : dbgs() << "\n\tDefs: ";
1104 : 0 : dumpBitVectorValues(S, S.BBStates[&BB].Defs);
1105 : 0 : dbgs() << "\n\tPhiOuts: ";
1106 : 0 : dumpBitVectorValues(S, S.BBStates[&BB].PhiOuts);
1107 : 0 : dbgs() << "\n\tUpExposedUses: ";
1108 : 0 : dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUses);
1109 : 0 : dbgs() << "\n\tLiveIn: ";
1110 : 0 : dumpBitVectorValues(S, S.BBStates[&BB].LiveIn);
1111 : 0 : dbgs() << "\n\tLiveOut: ";
1112 : 0 : dumpBitVectorValues(S, S.BBStates[&BB].LiveOut);
1113 : 0 : dbgs() << "\n";
1114 : : }
1115 : 0 : }
1116 : :
1117 : 12949600 : static bool isTBAA(MDNode *TBAA, std::initializer_list<const char*> const strset)
1118 : : {
1119 [ + + ]: 12949600 : if (!TBAA)
1120 : 3547460 : return false;
1121 [ + + ]: 27427900 : while (TBAA->getNumOperands() > 1) {
1122 : 22802100 : TBAA = cast<MDNode>(TBAA->getOperand(1).get());
1123 : 22802100 : auto str = cast<MDString>(TBAA->getOperand(0))->getString();
1124 [ + + ]: 78453600 : for (auto str2 : strset) {
1125 [ + + ]: 60427800 : if (str == str2) {
1126 : 4776280 : return true;
1127 : : }
1128 : : }
1129 : : }
1130 : 4625820 : return false;
1131 : : }
1132 : :
1133 : : // Check if this is a load from an immutable value. The easiest
1134 : : // way to do so is to look at the tbaa and see if it derives from
1135 : : // jtbaa_immut.
1136 : 7227880 : static bool isLoadFromImmut(LoadInst *LI)
1137 : : {
1138 [ + + ]: 7227880 : if (LI->getMetadata(LLVMContext::MD_invariant_load))
1139 : 2962310 : return true;
1140 : 4265570 : MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
1141 [ + + ]: 4265570 : if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"}))
1142 : 161942 : return true;
1143 : 4103630 : return false;
1144 : : }
1145 : :
1146 : 839257 : static bool isConstGV(GlobalVariable *gv)
1147 : : {
1148 [ + + + + ]: 839257 : return gv->isConstant() || gv->getMetadata("julia.constgv");
1149 : : }
1150 : :
1151 : : typedef llvm::SmallPtrSet<PHINode*, 1> PhiSet;
1152 : :
1153 : : static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen = nullptr);
1154 : 144099 : static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr)
1155 : : {
1156 : 144099 : v = v->stripInBoundsOffsets();
1157 [ + + ]: 144099 : if (auto LI = dyn_cast<LoadInst>(v))
1158 : 7257 : return isLoadFromConstGV(LI, task_local, seen);
1159 [ + + ]: 136842 : if (auto gv = dyn_cast<GlobalVariable>(v))
1160 : 4392 : return isConstGV(gv);
1161 : : // null pointer
1162 [ + + ]: 132450 : if (isa<ConstantData>(v))
1163 : 2236 : return true;
1164 : : // literal pointers
1165 [ + + ]: 130214 : if (auto CE = dyn_cast<ConstantExpr>(v))
1166 [ + - + - ]: 188246 : return (CE->getOpcode() == Instruction::IntToPtr &&
1167 : 188246 : isa<ConstantData>(CE->getOperand(0)));
1168 [ + + ]: 36091 : if (auto SL = dyn_cast<SelectInst>(v))
1169 [ + - + - ]: 8790 : return (isLoadFromConstGV(SL->getTrueValue(), task_local, seen) &&
1170 : 8790 : isLoadFromConstGV(SL->getFalseValue(), task_local, seen));
1171 [ + + ]: 31696 : if (auto Phi = dyn_cast<PHINode>(v)) {
1172 : 164 : PhiSet ThisSet(&Phi, &Phi);
1173 [ + - ]: 82 : if (!seen)
1174 : 82 : seen = &ThisSet;
1175 [ # # ]: 0 : else if (seen->count(Phi))
1176 : 0 : return true;
1177 : : else
1178 : 0 : seen->insert(Phi);
1179 : 82 : auto n = Phi->getNumIncomingValues();
1180 [ + + ]: 254 : for (unsigned i = 0; i < n; ++i) {
1181 [ - + ]: 172 : if (!isLoadFromConstGV(Phi->getIncomingValue(i), task_local, seen)) {
1182 : 0 : return false;
1183 : : }
1184 : : }
1185 : 82 : return true;
1186 : : }
1187 [ + + ]: 31614 : if (auto call = dyn_cast<CallInst>(v)) {
1188 : 31358 : auto callee = call->getCalledFunction();
1189 [ + - + + : 31358 : if (callee && callee->getName() == "julia.typeof") {
+ + ]
1190 : 1560 : return true;
1191 : : }
1192 [ + - - + : 29798 : if (callee && callee->getName() == "julia.get_pgcstack") {
- + ]
1193 : 0 : task_local = true;
1194 : 0 : return true;
1195 : : }
1196 : : }
1197 [ + + ]: 30054 : if (isa<Argument>(v)) {
1198 : 256 : task_local = true;
1199 : 256 : return true;
1200 : : }
1201 : 29798 : return false;
1202 : : }
1203 : :
1204 : : // Check if this is can be traced through constant loads to an constant global
1205 : : // or otherwise globally rooted value.
1206 : : // Almost all `tbaa_const` loads satisfies this with the exception of
1207 : : // task local constants which are constant as far as the code is concerned but aren't
1208 : : // global constants. For task local constant `task_local` will be true when this function
1209 : : // returns.
1210 : : //
1211 : : // The white list implemented here and above in `isLoadFromConstGV(Value*)` should
1212 : : // cover all the cases we and LLVM generates.
1213 : 5679610 : static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
1214 : : {
1215 : : // We only emit single slot GV in codegen
1216 : : // but LLVM global merging can change the pointer operands to GEPs/bitcasts
1217 : 5679610 : auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
1218 : 5679610 : auto gv = dyn_cast<GlobalVariable>(load_base);
1219 [ + + ]: 5679610 : if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa),
1220 : : {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
1221 [ + + ]: 1609960 : if (gv)
1222 : 1474830 : return true;
1223 : 135137 : return isLoadFromConstGV(load_base, task_local, seen);
1224 : : }
1225 [ + + ]: 4069650 : if (gv)
1226 : 834865 : return isConstGV(gv);
1227 : 3234780 : return false;
1228 : : }
1229 : :
1230 : 20958 : static uint64_t getLoadValueAlign(LoadInst *LI)
1231 : : {
1232 : 20958 : MDNode *md = LI->getMetadata(LLVMContext::MD_align);
1233 [ - + ]: 20958 : if (!md)
1234 : 0 : return 1;
1235 : 20958 : return mdconst::extract<ConstantInt>(md->getOperand(0))->getLimitedValue();
1236 : : }
1237 : :
1238 : 1968830 : static bool LooksLikeFrameRef(Value *V) {
1239 [ + + ]: 1968830 : if (isSpecialPtr(V->getType()))
1240 : 818673 : return false;
1241 : 1150160 : V = V->stripInBoundsOffsets();
1242 [ - + ]: 1150160 : if (isSpecialPtr(V->getType()))
1243 : 0 : return false;
1244 : 1150160 : return isa<Argument>(V);
1245 : : }
1246 : :
1247 : 441445 : SmallVector<int, 1> LateLowerGCFrame::GetPHIRefinements(PHINode *Phi, State &S)
1248 : : {
1249 : : // The returned vector can violate the domination property of the Refinements map.
1250 : : // However, we can't know for sure if this is valid here since incoming values
1251 : : // that does not dominate the PHI node may be externally rooted (i.e. can be refined to -1)
1252 : : // We only know that after scanning the whole function so we'll record the possibly invalid
1253 : : // edges here and fix them up at the end of `LocalScan`. (See `FixUpRefinements` below).
1254 : 441445 : auto nIncoming = Phi->getNumIncomingValues();
1255 : 441445 : SmallVector<int, 1> RefinedPtr(nIncoming);
1256 [ + + ]: 1525500 : for (unsigned i = 0; i < nIncoming; ++i)
1257 : 1084060 : RefinedPtr[i] = Number(S, Phi->getIncomingValue(i));
1258 : 441445 : return RefinedPtr;
1259 : : }
1260 : :
1261 : 0 : JL_USED_FUNC static void DumpRefinements(State *S)
1262 : : {
1263 [ # # ]: 0 : for (auto &kv: S->Refinements) {
1264 : 0 : int Num = kv.first;
1265 [ # # ]: 0 : if (Num < 0)
1266 : 0 : continue;
1267 : 0 : dbgs() << "Refinements for " << Num << " -- ";
1268 : 0 : auto V = S->ReversePtrNumbering[Num];
1269 : 0 : llvm_dump(V);
1270 [ # # ]: 0 : for (auto refine: kv.second) {
1271 [ # # ]: 0 : if (refine < 0) {
1272 : 0 : dbgs() << " " << (int)refine;
1273 : 0 : continue;
1274 : : }
1275 : 0 : dbgs() << " " << (int)refine << ": ";
1276 : 0 : auto R = S->ReversePtrNumbering[refine];
1277 : 0 : llvm_dump(R);
1278 : : }
1279 : : }
1280 : 0 : }
1281 : :
1282 : 685627 : void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
1283 : : {
1284 : : // Now we have all the possible refinement information, we can remove ones for the invalid
1285 : :
1286 : : // * First find all values that must be externally rooted.
1287 : : // Values may either be obviously externally rooted (e.g. arguments) - (this is indicated by a
1288 : : // value of -1 or -2 in the refinement map), or may be externally rooted by refinement to other
1289 : : // values. Thus a value is not externally rooted if it either:
1290 : : // either:
1291 : : // - Has no refinements (all obiviously externally rooted values are annotated by -1/-2 in the
1292 : : // refinement map).
1293 : : // - Recursively reaches a not-externally rooted value through its refinements
1294 : : //
1295 : : // We compute this set by first assuming all values are externally rooted and then iteratively
1296 : : // removing the ones that are not.
1297 : 1371250 : BitVector extern_rooted(S.MaxPtrNumber + 1, true);
1298 : 1371250 : BitVector perm_rooted(S.MaxPtrNumber + 1, true);
1299 : :
1300 : : // * First clear all values that are not derived from anything.
1301 : : // This only needs to be done once.
1302 [ + + ]: 5740630 : for (int i = 0; i <= S.MaxPtrNumber; i++) {
1303 : 5055000 : auto it = S.Refinements.find(i);
1304 [ + + - + : 5055000 : if (it == S.Refinements.end() || it->second.empty()) {
+ + ]
1305 : 3054420 : extern_rooted[i] = false;
1306 : 3054420 : perm_rooted[i] = false;
1307 : : }
1308 : : }
1309 : : // * Then remove values reachable from those values recursively
1310 : : bool changed;
1311 [ + + ]: 1184810 : do {
1312 : 1184810 : changed = false;
1313 [ + + ]: 5394680 : for (auto &kv: S.Refinements) {
1314 : 4209860 : int Num = kv.first;
1315 : : // Already cleared.
1316 [ + + ]: 4209860 : if (!HasBitSet(extern_rooted, Num))
1317 : 573090 : continue;
1318 [ + + ]: 7187670 : for (auto refine: kv.second) {
1319 [ + + ]: 3994440 : if (refine == -2) {
1320 : 1134140 : continue;
1321 : : }
1322 [ + + ]: 2860300 : else if (refine == -1) {
1323 [ + + ]: 2121940 : if (HasBitSet(perm_rooted, Num)) {
1324 : 1028730 : changed = true;
1325 : 1028730 : perm_rooted[Num] = false;
1326 : : }
1327 : 2121940 : continue;
1328 : : }
1329 [ + + ]: 738357 : else if (!HasBitSet(extern_rooted, refine)) {
1330 : 443549 : changed = true;
1331 : 443549 : extern_rooted[Num] = false;
1332 : 443549 : perm_rooted[Num] = false;
1333 : 443549 : break;
1334 : : }
1335 [ + + ]: 294808 : else if (!HasBitSet(perm_rooted, refine)) {
1336 [ + + ]: 197240 : if (HasBitSet(perm_rooted, Num)) {
1337 : 45717 : changed = true;
1338 : 45717 : perm_rooted[Num] = false;
1339 : : }
1340 : : }
1341 : : }
1342 : : }
1343 : : } while (changed);
1344 : : // * Now the `extern_rooted` and `perm_rooted` map is accurate,
1345 : : // normalize all externally rooted values.
1346 [ + + ]: 2686210 : for (auto &kv: S.Refinements) {
1347 : 2000580 : int Num = kv.first;
1348 [ + + ]: 2000580 : if (HasBitSet(perm_rooted, Num)) {
1349 : : // For permanently rooted values, set their refinements simply to `{-2}`
1350 : 496618 : kv.second.resize(1);
1351 : 496618 : kv.second[0] = -2;
1352 : 496618 : continue;
1353 : : }
1354 [ + + ]: 1503960 : else if (HasBitSet(extern_rooted, Num)) {
1355 : : // For externally rooted values, set their refinements simply to `{-1}`
1356 : 1060410 : kv.second.resize(1);
1357 : 1060410 : kv.second[0] = -1;
1358 : 1060410 : continue;
1359 : : }
1360 [ + + ]: 1462880 : for (auto &refine: kv.second) {
1361 : : // For other values,
1362 : : // remove all externally rooted values from their refinements (replace with -1)
1363 : : // No need to handle -2 specially since it won't make a difference.
1364 [ + + ]: 1019330 : if (HasBitSet(extern_rooted, refine)) {
1365 : 36429 : refine = -1;
1366 : : }
1367 : : }
1368 : : }
1369 : : // Scan all phi node refinements and remove all invalid ones.
1370 : : // As a generalization to what we did to externally rooted values above,
1371 : : // we can also relax non-dominating (invalid) refinements to the refinements of those values
1372 : : // If all of those values dominate the phi node then the phi node can be refined to
1373 : : // those values instead.
1374 : : // While we recursively relax the refinement, we need to keep track of the values we've
1375 : : // visited in order to not scan them again.
1376 : 1371250 : BitVector visited(S.MaxPtrNumber + 1, false);
1377 [ + + ]: 1184940 : for (auto Num: PHINumbers) {
1378 : : // Not sure if `Num` can be `-1`
1379 [ + - + + : 499310 : if (Num < 0 || HasBitSet(extern_rooted, Num))
+ + ]
1380 : 66603 : continue;
1381 : : // N.B.: We reset the bit vector below on every iteration
1382 : 432707 : visited[Num] = true;
1383 : 432707 : auto Phi = cast<PHINode>(S.ReversePtrNumbering[Num]);
1384 : 432707 : auto &RefinedPtr = S.Refinements[Num];
1385 : 432707 : unsigned j = 0; // new length
1386 [ + + ]: 815853 : for (unsigned i = 0; i < RefinedPtr.size(); i++) {
1387 : 714044 : auto refine = RefinedPtr[i];
1388 [ + + + + : 714044 : if (refine < 0 || visited[refine])
+ + ]
1389 : 383146 : continue;
1390 : 605364 : visited[refine] = true;
1391 [ + + ]: 605364 : if (i != j)
1392 : 58785 : RefinedPtr[j] = refine;
1393 : 605364 : j++;
1394 [ + - ]: 605364 : if (auto inst = dyn_cast<Instruction>(S.ReversePtrNumbering[refine])) {
1395 [ + + ]: 605364 : if (!S.DT)
1396 : 87941 : S.DT = &GetDT();
1397 [ + + ]: 605364 : if (S.DT->dominates(inst, Phi))
1398 : 274466 : continue;
1399 : : // Decrement `j` so we'll overwrite/ignore it.
1400 : 439344 : j--;
1401 : : // Non-dominating refinement
1402 : 439344 : auto it = S.Refinements.find(refine);
1403 [ + + + + : 439344 : if (it != S.Refinements.end() && !it->second.empty()) {
+ + ]
1404 : : // Found a replacement, replace current element.
1405 : 108446 : auto &NewRefinedPtr = it->second;
1406 : 108446 : unsigned n = NewRefinedPtr.size();
1407 : : // First fill in the gap between `i` and `j`
1408 : 108446 : unsigned k = 0;
1409 [ + + + + ]: 224591 : for (; k < n && i >= j + k; k++)
1410 : 116145 : RefinedPtr[i - k] = NewRefinedPtr[k];
1411 : 108446 : i = i - k;
1412 [ + + ]: 108446 : if (k < n)
1413 : 69957 : RefinedPtr.append(it->second.begin() + k, it->second.end());
1414 : 108446 : continue;
1415 : : }
1416 : : // Invalid - Remove All refinements
1417 : 330898 : RefinedPtr.resize(0);
1418 : 330898 : break;
1419 : : }
1420 : : }
1421 [ + + ]: 432707 : if (!RefinedPtr.empty()) {
1422 : : // `j == 0` here means that everything is externally rooted.
1423 : : // This should have been handled by the first loop above.
1424 [ + - + - ]: 43944 : assert(j != 0 && j <= RefinedPtr.size());
1425 : 43944 : RefinedPtr.resize(j);
1426 : : }
1427 : 432707 : visited.reset();
1428 : : }
1429 : 685627 : }
1430 : :
1431 : 685627 : State LateLowerGCFrame::LocalScan(Function &F) {
1432 : 685627 : State S(F);
1433 : 1371250 : SmallVector<int, 8> PHINumbers;
1434 [ + + ]: 7013350 : for (BasicBlock &BB : F) {
1435 : 6327720 : BBState &BBS = S.BBStates[&BB];
1436 [ + + ]: 61475800 : for (auto it = BB.rbegin(); it != BB.rend(); ++it) {
1437 : 55148000 : Instruction &I = *it;
1438 [ + + ]: 55148000 : if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1439 [ + + ]: 7097910 : if (isa<IntrinsicInst>(CI)) {
1440 : : // Most intrinsics are not gc uses/defs, however some have
1441 : : // memory operands and could thus be GC uses. To be conservative,
1442 : : // we only skip processing for those that we know we emit often
1443 : : // and cannot possibly be GC uses.
1444 : 1768860 : IntrinsicInst *II = cast<IntrinsicInst>(CI);
1445 [ + + ]: 2571960 : if (isa<DbgInfoIntrinsic>(CI) ||
1446 [ + + + + : 2571960 : II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ + ]
1447 : 720540 : II->getIntrinsicID() == Intrinsic::lifetime_end) {
1448 : 2884300 : continue;
1449 : : }
1450 [ + + + + : 1141880 : if (II->getIntrinsicID() == Intrinsic::masked_load ||
+ + ]
1451 : 570576 : II->getIntrinsicID() == Intrinsic::masked_gather) {
1452 [ + - ]: 1119 : if (auto VTy = dyn_cast<VectorType>(II->getType())) {
1453 [ + + ]: 1119 : if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
1454 [ + - ]: 12 : if (isSpecialPtr(PtrT)) {
1455 : : // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
1456 : : // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
1457 : 12 : Value *passthru = II->getArgOperand(3);
1458 [ + - ]: 12 : if (isa<UndefValue>(passthru)) {
1459 : 12 : II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
1460 : : }
1461 [ - + ]: 12 : if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
1462 : : // These are not real defs
1463 : 0 : continue;
1464 : : }
1465 : : }
1466 : : }
1467 : : }
1468 : : }
1469 : : }
1470 : 5900350 : auto callee = CI->getCalledFunction();
1471 [ + + + + ]: 5900350 : if (callee && callee == typeof_func) {
1472 : 190173 : MaybeNoteDef(S, BBS, CI, BBS.Safepoints, SmallVector<int, 1>{-2});
1473 : : }
1474 : : else {
1475 : 5710180 : MaybeNoteDef(S, BBS, CI, BBS.Safepoints);
1476 : : }
1477 [ + + ]: 5900350 : if (CI->hasStructRetAttr()) {
1478 : 179421 : Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
1479 [ - + ]: 179421 : assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
1480 : 179421 : auto tracked = CountTrackedPointers(ElT);
1481 [ + + ]: 179421 : if (tracked.count) {
1482 : 129658 : AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
1483 [ - + ]: 129658 : assert(SRet);
1484 : : {
1485 [ + - - + : 129658 : if (!(SRet->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
- - + - ]
1486 [ - + ]: 129658 : assert(!tracked.derived);
1487 [ + + ]: 129658 : if (tracked.all) {
1488 : 48002 : S.ArrayAllocas[SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize())->getZExtValue();
1489 : : }
1490 : : else {
1491 : 81656 : Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets();
1492 : 81656 : AllocaInst *SRet_gc = nullptr;
1493 [ - + ]: 81656 : if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
1494 [ # # ]: 0 : for (Value *V : Phi->incoming_values()) {
1495 [ # # ]: 0 : if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
1496 [ # # ]: 0 : if (SRet_gc == nullptr) {
1497 : 0 : SRet_gc = Alloca;
1498 [ # # ]: 0 : } else if (SRet_gc == Alloca) {
1499 : 0 : continue;
1500 : : } else {
1501 : 0 : llvm_dump(Alloca);
1502 : 0 : llvm_dump(SRet_gc);
1503 : 0 : assert(false && "Allocas in Phi node should match");
1504 : : }
1505 : : } else {
1506 : 0 : llvm_dump(V->stripInBoundsOffsets());
1507 : 0 : assert(false && "Expected alloca");
1508 : : }
1509 : : }
1510 : : } else {
1511 : 81656 : SRet_gc = dyn_cast<AllocaInst>(arg1);
1512 : : }
1513 [ - + ]: 81656 : if (!SRet_gc) {
1514 : 0 : llvm_dump(CI);
1515 : 0 : llvm_dump(arg1);
1516 : 0 : assert(false && "Expected alloca");
1517 : : }
1518 : 81656 : Type *ElT = SRet_gc->getAllocatedType();
1519 [ + - - + : 81656 : if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
- - + - ]
1520 : 81656 : S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
1521 : : }
1522 : : }
1523 : : }
1524 : : }
1525 : : }
1526 : : }
1527 : 5900350 : NoteOperandUses(S, BBS, I);
1528 [ + + ]: 5900350 : if (CI->canReturnTwice()) {
1529 : 34815 : S.ReturnsTwice.push_back(CI);
1530 : : }
1531 [ + + ]: 5900350 : if (callee) {
1532 [ + + ]: 5609420 : if (callee == gc_preserve_begin_func) {
1533 : 54426 : std::vector<int> args;
1534 [ + + ]: 56211 : for (Use &U : CI->args()) {
1535 : 28998 : Value *V = U;
1536 [ + + ]: 28998 : if (isa<Constant>(V))
1537 : 34 : continue;
1538 [ + + ]: 28964 : if (isa<PointerType>(V->getType())) {
1539 [ + - ]: 27770 : if (isSpecialPtr(V->getType())) {
1540 : 27770 : int Num = Number(S, V);
1541 [ + + ]: 27770 : if (Num >= 0)
1542 : 19636 : args.push_back(Num);
1543 : : }
1544 : : } else {
1545 : 2388 : std::vector<int> Nums = NumberAll(S, V);
1546 [ + + ]: 2392 : for (int Num : Nums) {
1547 [ + + ]: 1198 : if (Num < 0)
1548 : 79 : continue;
1549 : 1119 : args.push_back(Num);
1550 : : }
1551 : : }
1552 : : }
1553 : 27213 : S.GCPreserves[CI] = args;
1554 : 27213 : continue;
1555 : : }
1556 : : // Known functions emitted in codegen that are not safepoints
1557 [ + - ]: 5426700 : if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
1558 [ + + + + ]: 5426700 : callee == gc_preserve_end_func || callee == typeof_func ||
1559 [ + + + + : 9693000 : callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
+ + ]
1560 [ + + ]: 9002060 : callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
1561 [ + + + + : 15431100 : callee == write_barrier_func || callee == write_barrier_binding_func ||
+ + - + ]
1562 [ + + ]: 10004400 : callee->getName() == "memcmp") {
1563 : 1160010 : continue;
1564 : : }
1565 [ + + ]: 8642310 : if (callee->hasFnAttribute(Attribute::ReadNone) ||
1566 [ + + + + : 8642310 : callee->hasFnAttribute(Attribute::ReadOnly) ||
+ + ]
1567 : 4218990 : callee->hasFnAttribute(Attribute::ArgMemOnly)) {
1568 : 482892 : continue;
1569 : : }
1570 [ - + ]: 3939300 : if (MemTransferInst *MI = dyn_cast<MemTransferInst>(CI)) {
1571 : 0 : MaybeTrackDst(S, MI);
1572 : : }
1573 : : }
1574 [ + - + - ]: 12657500 : if (isa<IntrinsicInst>(CI) || CI->hasFnAttr(Attribute::ArgMemOnly) ||
1575 [ + + - + : 12657500 : CI->hasFnAttr(Attribute::ReadNone) || CI->hasFnAttr(Attribute::ReadOnly)) {
+ + ]
1576 : : // Intrinsics are never safepoints.
1577 : 16622 : continue;
1578 : : }
1579 : 8427220 : std::vector<int> CalleeRoots;
1580 [ + + ]: 14637400 : for (Use &U : CI->args()) {
1581 : : // Find all callee rooted arguments.
1582 : : // Record them instead of simply remove them from live values here
1583 : : // since they can be useful during refinement
1584 : : // (e.g. to remove roots of objects that are refined to these)
1585 : 10423800 : Value *V = U;
1586 [ + + + + : 15707500 : if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
+ + + + ]
1587 : 5283680 : getValueAddrSpace(V) != AddressSpace::CalleeRooted)
1588 : 9871030 : continue;
1589 : 715935 : V = V->stripPointerCasts();
1590 [ + + ]: 715935 : if (!isTrackedValue(V))
1591 : 41648 : continue;
1592 : 674287 : auto Num = Number(S, V);
1593 [ + + ]: 674287 : if (Num < 0)
1594 : 121529 : continue;
1595 : 552758 : CalleeRoots.push_back(Num);
1596 : : }
1597 : 4213610 : int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
1598 : 4213610 : BBS.HasSafepoint = true;
1599 : 4213610 : BBS.TopmostSafepoint = SafepointNumber;
1600 : 4213610 : BBS.Safepoints.push_back(SafepointNumber);
1601 [ + + ]: 48050100 : } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1602 : : // If this is a load from an immutable, we know that
1603 : : // this object will always be rooted as long as the
1604 : : // object we're loading from is, so we can refine uses
1605 : : // of this object to uses of the object we're loading
1606 : : // from.
1607 : 14455800 : SmallVector<int, 1> RefinedPtr{};
1608 : 7227880 : Type *Ty = LI->getType()->getScalarType();
1609 : 7227880 : bool task_local = false;
1610 [ + + + + : 7227880 : if (isLoadFromImmut(LI) && isSpecialPtr(LI->getPointerOperand()->getType())) {
+ + ]
1611 : 1051640 : RefinedPtr.push_back(Number(S, LI->getPointerOperand()));
1612 [ + + ]: 9462820 : } else if (LI->getType()->isPointerTy() &&
1613 [ + + + + : 9462820 : isSpecialPtr(Ty) &&
+ + ]
1614 : 1968830 : LooksLikeFrameRef(LI->getPointerOperand())) {
1615 : : // Loads from a jlcall argument array
1616 : 748272 : RefinedPtr.push_back(-1);
1617 : : }
1618 [ + + ]: 5427960 : else if (isLoadFromConstGV(LI, task_local)) {
1619 : : // If this is a const load from a global,
1620 : : // we know that the object is a constant as well and doesn't need rooting.
1621 : : // If this is a task local constant, we don't need to root it within the
1622 : : // task but we do need to issue write barriers for when the current task dies.
1623 [ + + ]: 1331150 : RefinedPtr.push_back(task_local ? -1 : -2);
1624 : : }
1625 [ + + + + : 7227880 : if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+ + ]
1626 : 6702720 : MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
1627 : : }
1628 : 7227880 : NoteOperandUses(S, BBS, I);
1629 [ + + ]: 40822200 : } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
1630 : 6058 : Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
1631 [ + + + - : 6058 : if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+ - ]
1632 : 6058 : MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
1633 : : }
1634 : 6058 : NoteOperandUses(S, BBS, I);
1635 : : // TODO: do we need MaybeTrackStore(S, LI);
1636 [ + + ]: 40816200 : } else if (auto *LI = dyn_cast<AtomicRMWInst>(&I)) {
1637 : 3149 : Type *Ty = LI->getType()->getScalarType();
1638 [ - + - - : 3149 : if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+ - ]
1639 : 3149 : MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
1640 : : }
1641 : 3149 : NoteOperandUses(S, BBS, I);
1642 : : // TODO: do we need MaybeTrackStore(S, LI);
1643 [ + + ]: 40813000 : } else if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
1644 : 779273 : auto tracked = CountTrackedPointers(SI->getType());
1645 [ + + + + ]: 779273 : if (tracked.count && !tracked.derived) {
1646 : : // record the select definition of these values
1647 : 139128 : SmallVector<int, 2> RefinedPtr;
1648 [ + + ]: 69564 : if (isa<PointerType>(SI->getType())) {
1649 : : // TODO: Refinements for vector select
1650 : 65799 : RefinedPtr = {
1651 : 65799 : Number(S, SI->getTrueValue()),
1652 : 65799 : Number(S, SI->getFalseValue())
1653 : 65799 : };
1654 : : }
1655 : 69564 : MaybeNoteDef(S, BBS, SI, BBS.Safepoints, std::move(RefinedPtr));
1656 : 139128 : NoteOperandUses(S, BBS, I);
1657 [ + + ]: 709709 : } else if (tracked.count) {
1658 : : // We need to insert extra selects for the GC roots
1659 : 18525 : LiftSelect(S, SI);
1660 : : }
1661 [ + + ]: 40033800 : } else if (PHINode *Phi = dyn_cast<PHINode>(&I)) {
1662 : 2032620 : auto tracked = CountTrackedPointers(Phi->getType());
1663 [ + + + + ]: 2032620 : if (tracked.count && !tracked.derived) {
1664 : : // record the phi definition of these values
1665 : 949530 : SmallVector<int, 1> PHIRefinements;
1666 [ + + ]: 474765 : if (isa<PointerType>(Phi->getType()))
1667 : : // TODO: Vector refinements
1668 : 441445 : PHIRefinements = GetPHIRefinements(Phi, S);
1669 : 474765 : MaybeNoteDef(S, BBS, Phi, BBS.Safepoints, std::move(PHIRefinements));
1670 [ + + ]: 474765 : if (isa<PointerType>(Phi->getType())) {
1671 : 441445 : PHINumbers.push_back(Number(S, Phi));
1672 : : } else {
1673 : 66640 : std::vector<int> Nums = NumberAll(S, Phi);
1674 [ + + ]: 91185 : for (int Num : Nums)
1675 : 57865 : PHINumbers.push_back(Num);
1676 : : }
1677 : 474765 : unsigned nIncoming = Phi->getNumIncomingValues();
1678 [ + + ]: 1637400 : for (unsigned i = 0; i < nIncoming; ++i) {
1679 : 1162640 : BBState &IncomingBBS = S.BBStates[Phi->getIncomingBlock(i)];
1680 : 1162640 : NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts);
1681 : 474765 : }
1682 [ + + ]: 1557850 : } else if (tracked.count) {
1683 : : // We need to insert extra phis for the GC roots
1684 : 41511 : LiftPhi(S, Phi);
1685 : : }
1686 [ + + ]: 38001100 : } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1687 : 4512080 : NoteOperandUses(S, BBS, I);
1688 : 4512080 : MaybeTrackStore(S, SI);
1689 [ + + ]: 33489100 : } else if (isa<ReturnInst>(&I)) {
1690 : 630117 : NoteOperandUses(S, BBS, I);
1691 [ + + ]: 32858900 : } else if (auto *ASCI = dyn_cast<AddrSpaceCastInst>(&I)) {
1692 [ + + ]: 3577590 : if (isTrackedValue(ASCI)) {
1693 : 486590 : SmallVector<int, 1> RefinedPtr{};
1694 : 243295 : bool task_local = false;
1695 : 243295 : auto origin = ASCI->getPointerOperand()->stripPointerCasts();
1696 [ + + ]: 243295 : if (auto LI = dyn_cast<LoadInst>(origin)) {
1697 [ + - ]: 223433 : if (isLoadFromConstGV(LI, task_local)) {
1698 [ - + ]: 223433 : RefinedPtr.push_back(task_local ? -1 : -2);
1699 : : }
1700 : : }
1701 : 243295 : MaybeNoteDef(S, BBS, ASCI, BBS.Safepoints, std::move(RefinedPtr));
1702 : : }
1703 [ + + ]: 29281400 : } else if (auto *AI = dyn_cast<AllocaInst>(&I)) {
1704 : 1423070 : Type *ElT = AI->getAllocatedType();
1705 [ + + + + : 1423070 : if (AI->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked) {
+ + + + ]
1706 : 149918 : S.Allocas.push_back(AI);
1707 : : }
1708 : : }
1709 : : }
1710 : : // Pre-seed the dataflow variables;
1711 : 6327720 : BBS.LiveIn = BBS.UpExposedUses;
1712 : 6327720 : BBS.Done = true;
1713 : : }
1714 : 685627 : FixUpRefinements(PHINumbers, S);
1715 : 685627 : return S;
1716 : : }
1717 : :
1718 : 85495 : static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
1719 : 85495 : Type *T_int32 = Type::getInt32Ty(V->getContext());
1720 [ + + ]: 85495 : if (isptr) {
1721 : 65046 : std::vector<Value*> IdxList{Idxs.size() + 1};
1722 : 32523 : IdxList[0] = ConstantInt::get(T_int32, 0);
1723 [ + + ]: 66889 : for (unsigned j = 0; j < Idxs.size(); ++j) {
1724 : 34366 : IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
1725 : : }
1726 : 32523 : Value *GEP = irbuilder.CreateInBoundsGEP(VTy, V, IdxList);
1727 : 32523 : Type *T = GetElementPtrInst::getIndexedType(VTy, IdxList);
1728 [ - + ]: 32523 : assert(T->isPointerTy());
1729 : 32523 : V = irbuilder.CreateAlignedLoad(T, GEP, Align(sizeof(void*)));
1730 : : // since we're doing stack operations, it should be safe do this non-atomically
1731 : 32523 : cast<LoadInst>(V)->setOrdering(AtomicOrdering::NotAtomic);
1732 : : }
1733 [ - + ]: 52972 : else if (isa<PointerType>(V->getType())) {
1734 [ # # ]: 0 : assert(Idxs.empty());
1735 : : }
1736 [ + - ]: 52972 : else if (!Idxs.empty()) {
1737 : 52972 : auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
1738 : 52972 : Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
1739 : 52972 : bool IsVector = isa<VectorType>(FinalT);
1740 [ + - ]: 52972 : if (Idxs.size() > IsVector)
1741 [ - + ]: 52972 : V = irbuilder.Insert(ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs));
1742 [ - + ]: 52972 : if (IsVector)
1743 : 0 : V = irbuilder.Insert(ExtractElementInst::Create(V,
1744 : 0 : ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back())));
1745 : : }
1746 : 85495 : return V;
1747 : : }
1748 : :
1749 : 1122 : static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigned> Idxs)
1750 : : {
1751 : 1122 : SmallVector<Value*,4> IdxList{Idxs.size() + 1};
1752 : 1122 : Type *T_int32 = Type::getInt32Ty(STy->getContext());
1753 : 1122 : IdxList[0] = ConstantInt::get(T_int32, 0);
1754 [ + + ]: 2285 : for (unsigned j = 0; j < Idxs.size(); ++j)
1755 : 1163 : IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
1756 : 1122 : auto offset = DL.getIndexedOffsetInType(STy, IdxList);
1757 [ - + ]: 1122 : assert(offset >= 0);
1758 : 1122 : return (unsigned)offset;
1759 : : }
1760 : :
1761 : 48330 : std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
1762 : 96660 : auto Tracked = TrackCompositeType(STy);
1763 : 48330 : std::vector<Value*> Ptrs;
1764 : 48330 : unsigned perm_idx = 0;
1765 : 86551 : auto ignore_field = [&] (ArrayRef<unsigned> Idxs) {
1766 [ + + ]: 86551 : if (perm_idx >= perm_offsets.size())
1767 : 85429 : return false;
1768 : : // Assume the indices returned from `TrackCompositeType` is ordered and do a
1769 : : // single pass over `perm_offsets`.
1770 [ - + ]: 1122 : assert(!isptr);
1771 : 1122 : auto offset = getFieldOffset(irbuilder.GetInsertBlock()->getModule()->getDataLayout(),
1772 : 1122 : STy, Idxs);
1773 : 0 : do {
1774 : 1122 : auto perm_offset = perm_offsets[perm_idx];
1775 [ + + ]: 1122 : if (perm_offset > offset)
1776 : 66 : return false;
1777 : 1056 : perm_idx++;
1778 [ + - ]: 1056 : if (perm_offset == offset) {
1779 : 1056 : return true;
1780 : : }
1781 [ # # ]: 0 : } while (perm_idx < perm_offsets.size());
1782 : 0 : return false;
1783 : 48330 : };
1784 [ + + ]: 134881 : for (unsigned i = 0; i < Tracked.size(); ++i) {
1785 : 86551 : auto Idxs = makeArrayRef(Tracked[i]);
1786 [ + + ]: 86551 : if (ignore_field(Idxs))
1787 : 1056 : continue;
1788 : 85495 : Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder);
1789 : 85495 : Ptrs.push_back(Elem);
1790 : : }
1791 : 48330 : return Ptrs;
1792 : : }
1793 : :
1794 : 33383 : unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
1795 : 33383 : auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
1796 [ + + ]: 99542 : for (unsigned i = 0; i < Ptrs.size(); ++i) {
1797 : 66159 : Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
1798 : 66159 : Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
1799 [ - + ]: 66159 : assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
1800 : 66159 : StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
1801 : 66159 : shadowStore->setOrdering(AtomicOrdering::NotAtomic);
1802 : : // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
1803 : : }
1804 : 33383 : return Ptrs.size();
1805 : : }
1806 : :
1807 : :
1808 : : // turn a memcpy into a set of loads
1809 : 0 : void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) {
1810 : : //Value *Dst = MI->getRawDest()->stripInBoundsOffsets();
1811 : : //if (AllocaInst *AI = dyn_cast<AllocaInst>(Dst)) {
1812 : : // Type *STy = AI->getAllocatedType();
1813 : : // if (!AI->isStaticAlloca() || (isa<PointerType>(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI))
1814 : : // return; // already numbered this
1815 : : // auto tracked = CountTrackedPointers(STy);
1816 : : // unsigned nroots = tracked.count * cast<ConstantInt>(AI->getArraySize())->getZExtValue();
1817 : : // if (nroots) {
1818 : : // assert(!tracked.derived);
1819 : : // if (!tracked.all) {
1820 : : // // materialize shadow LoadInst and StoreInst ops to make a copy of just the tracked values inside
1821 : : // //assert(MI->getLength() == DL.getTypeAllocSize(AI->getAllocatedType()) && !AI->isArrayAllocation()); // XXX: handle partial copy
1822 : : // Value *Src = MI->getSource();
1823 : : // Src = new BitCastInst(Src, STy->getPointerTo(MI->getSourceAddressSpace()), "", MI);
1824 : : // auto &Shadow = S.ShadowAllocas[AI];
1825 : : // if (!Shadow)
1826 : : // Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, nroots), 0, "", MI);
1827 : : // AI = Shadow;
1828 : : // unsigned count = TrackWithShadow(Src, STy, true, AI, IRBuilder<>(MI));
1829 : : // assert(count == tracked.count); (void)count;
1830 : : // }
1831 : : // S.ArrayAllocas[AI] = nroots;
1832 : : // }
1833 : : //}
1834 : : //// TODO: else???
1835 : 0 : }
1836 : :
1837 : 4512080 : void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) {
1838 : 4512080 : Value *PtrBase = I->getPointerOperand()->stripInBoundsOffsets();
1839 : 4512080 : auto tracked = CountTrackedPointers(I->getValueOperand()->getType());
1840 [ + + ]: 4512080 : if (!tracked.count)
1841 : 4415200 : return; // nothing to track is being stored
1842 [ + + ]: 1021970 : if (AllocaInst *AI = dyn_cast<AllocaInst>(PtrBase)) {
1843 : 501142 : Type *STy = AI->getAllocatedType();
1844 [ + + + + : 501142 : if (!AI->isStaticAlloca() || (isa<PointerType>(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI))
- + + + +
+ ]
1845 : 404267 : return; // already numbered this
1846 : 163406 : auto tracked = CountTrackedPointers(STy);
1847 [ + + ]: 163406 : if (tracked.count) {
1848 [ - + ]: 163405 : assert(!tracked.derived);
1849 [ + + ]: 163405 : if (tracked.all) {
1850 : : // track the Alloca directly
1851 : 66531 : S.ArrayAllocas[AI] = tracked.count * cast<ConstantInt>(AI->getArraySize())->getZExtValue();
1852 : 66531 : return;
1853 : : }
1854 : : }
1855 : : }
1856 : : else {
1857 : 520829 : return; // assume it is rooted--TODO: should we be more conservative?
1858 : : }
1859 : : // track the Store with a Shadow
1860 : : //auto &Shadow = S.ShadowAllocas[AI];
1861 : : //if (!Shadow)
1862 : : // Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI);
1863 : : //AI = Shadow;
1864 : : //Value *Src = I->getValueOperand();
1865 : : //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?);
1866 : : //assert(count == tracked.count); (void)count;
1867 : 96875 : S.TrackedStores.push_back(std::make_pair(I, tracked.count));
1868 : : }
1869 : :
1870 : : /*
1871 : : * DataFlow equations:
1872 : : * LiveIn[BB] = UpExposedUses[BB] ∪ (LiveOut[BB] - Defs[BB])
1873 : : * LiveOut[BB] = PhiUses[BB] ∪ ∪_{Succ} LiveIn[Succ]
1874 : : *
1875 : : * We'll perform textbook iterative dataflow to compute this. There are better
1876 : : * algorithms. If this starts becoming a problem, we should use one of them.
1877 : : */
1878 : 685627 : void LateLowerGCFrame::ComputeLiveness(State &S) {
1879 : 685627 : bool Converged = false;
1880 : : /* Liveness is a reverse problem, so RPOT is a good way to
1881 : : * perform this iteration.
1882 : : */
1883 : 1371250 : ReversePostOrderTraversal<Function *> RPOT(S.F);
1884 : 1371250 : BitVector NewLive;
1885 [ + + ]: 3010250 : while (!Converged) {
1886 : 2324630 : bool AnyChanged = false;
1887 [ + + ]: 138951000 : for (BasicBlock *BB : RPOT) {
1888 : : // This could all be done more efficiently, by only updating what
1889 : : // changed - Let's get it working first though.
1890 : 136626000 : BBState &BBS = S.BBStates[BB];
1891 : 136626000 : NewLive = BBS.PhiOuts;
1892 [ + + ]: 314092000 : for (BasicBlock *Succ : successors(BB)) {
1893 : 177466000 : NewLive |= S.BBStates[Succ].LiveIn;
1894 : : }
1895 [ + + ]: 136626000 : if (NewLive != BBS.LiveOut) {
1896 : 20768900 : AnyChanged = true;
1897 : 20768900 : BBS.LiveOut = NewLive;
1898 : 20768900 : MaybeResize(BBS, BBS.LiveOut.size() - 1);
1899 : : }
1900 : 136626000 : NewLive.reset(BBS.Defs);
1901 : 136626000 : NewLive |= BBS.UpExposedUses;
1902 [ + + ]: 136626000 : if (NewLive != BBS.LiveIn) {
1903 : 18163100 : AnyChanged = true;
1904 : 18163100 : std::swap(BBS.LiveIn, NewLive);
1905 : : }
1906 : : }
1907 : 2324630 : Converged = !AnyChanged;
1908 : : }
1909 : 685627 : ComputeLiveSets(S);
1910 : 685627 : }
1911 : :
1912 : : // For debugging
1913 : 0 : JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const char *BBName) {
1914 [ # # ]: 0 : for (auto it : S.SafepointNumbering) {
1915 [ # # ]: 0 : if (it.first->getParent()->getName() == BBName) {
1916 : 0 : dbgs() << "Live at " << *it.first << "\n";
1917 : 0 : BitVector &LS = S.LiveSets[it.second];
1918 [ # # ]: 0 : for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
1919 : 0 : dbgs() << "\t";
1920 : 0 : S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
1921 : 0 : dbgs() << "\n";
1922 : : }
1923 : : }
1924 : : }
1925 : 0 : }
1926 : :
1927 : 4213610 : void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots)
1928 : : {
1929 : 8427220 : BitVector FullLS(S.MaxPtrNumber + 1, false);
1930 : 4213610 : FullLS |= LS;
1931 : : // First expand the live set according to the refinement map
1932 : : // so that we can see all the values that are effectively live.
1933 [ + + ]: 4766370 : for (auto Num: CalleeRoots) {
1934 : : // For callee rooted values, they are all kept alive at the safepoint.
1935 : : // Make sure they are marked (even though they probably are already)
1936 : : // so that other values can be refined to them.
1937 : 552758 : FullLS[Num] = 1;
1938 : : }
1939 : : bool changed;
1940 [ + + ]: 7209640 : do {
1941 : 7209640 : changed = false;
1942 [ + + ]: 766422000 : for (auto &kv: S.Refinements) {
1943 : 759213000 : int Num = kv.first;
1944 [ + - + + : 759213000 : if (Num < 0 || HasBitSet(FullLS, Num) || kv.second.empty())
+ + + + ]
1945 : 603722000 : continue;
1946 : 155491000 : bool live = true;
1947 [ + + ]: 293542000 : for (auto &refine: kv.second) {
1948 [ + + + + : 157505000 : if (refine < 0 || HasBitSet(FullLS, refine))
+ + ]
1949 : 138051000 : continue;
1950 : 19453900 : live = false;
1951 : 19453900 : break;
1952 : : }
1953 [ + + ]: 155491000 : if (live) {
1954 : 136037000 : changed = true;
1955 : 136037000 : FullLS[Num] = 1;
1956 : : }
1957 : : }
1958 : : } while (changed);
1959 : : // Now remove all values from the LiveSet that's kept alive by other objects
1960 : : // This loop only mutate `LS` which isn't read from in the loop body so
1961 : : // a single pass is enough.
1962 [ + + ]: 12651500 : for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
1963 [ + + ]: 8437900 : if (!S.Refinements.count(Idx))
1964 : 4751740 : continue;
1965 : 3686160 : const auto &RefinedPtr = S.Refinements[Idx];
1966 [ + + ]: 3686160 : if (RefinedPtr.empty())
1967 : 2111130 : continue;
1968 : 1575030 : bool rooted = true;
1969 [ + + ]: 3116280 : for (auto RefPtr: RefinedPtr) {
1970 [ + + + + : 1586840 : if (RefPtr < 0 || HasBitSet(FullLS, RefPtr))
+ + ]
1971 : 1541250 : continue;
1972 : 45591 : rooted = false;
1973 : 45591 : break;
1974 : : }
1975 [ + + ]: 1575030 : if (rooted) {
1976 : 1529440 : LS[Idx] = 0;
1977 : : }
1978 : : }
1979 [ + + ]: 4766370 : for (auto Num: CalleeRoots) {
1980 : : // Now unmark all values that are rooted by the callee after
1981 : : // refining other values to them.
1982 : 552758 : LS[Num] = 0;
1983 : : }
1984 : 4213610 : }
1985 : :
1986 : 685627 : void LateLowerGCFrame::ComputeLiveSets(State &S) {
1987 : : // Iterate over all safe points. Add to live sets all those variables that
1988 : : // are now live across their parent block.
1989 [ + + ]: 4899240 : for (auto it : S.SafepointNumbering) {
1990 : 4213610 : int idx = it.second;
1991 : 4213610 : Instruction *Safepoint = it.first;
1992 : 4213610 : BasicBlock *BB = Safepoint->getParent();
1993 : 4213610 : BBState &BBS = S.BBStates[BB];
1994 : 8427220 : BitVector LiveAcross = BBS.LiveIn;
1995 : 4213610 : LiveAcross &= BBS.LiveOut;
1996 : 4213610 : BitVector &LS = S.LiveSets[idx];
1997 : 4213610 : LS |= LiveAcross;
1998 [ + + ]: 10624300 : for (int Live : S.LiveIfLiveOut[idx]) {
1999 [ + + ]: 6410710 : if (HasBitSet(BBS.LiveOut, Live))
2000 : 750905 : LS[Live] = 1;
2001 : : }
2002 : 4213610 : RefineLiveSet(LS, S, S.CalleeRoots[idx]);
2003 : : // If the function has GC preserves, figure out whether we need to
2004 : : // add in any extra live values.
2005 [ + + ]: 4213610 : if (!S.GCPreserves.empty()) {
2006 [ + + ]: 334444 : if (!S.DT) {
2007 : 7938 : S.DT = &GetDT();
2008 : : }
2009 [ + + ]: 1724590 : for (auto it2 : S.GCPreserves) {
2010 [ + + ]: 1390150 : if (!S.DT->dominates(it2.first, Safepoint))
2011 : 1227620 : continue;
2012 : 162525 : bool OutsideRange = false;
2013 [ + + ]: 486977 : for (const User *U : it2.first->users()) {
2014 : : // If this is dominated by an end, we don't need to add
2015 : : // the values to our live set.
2016 [ + + ]: 448462 : if (S.DT->dominates(cast<Instruction>(U), Safepoint)) {
2017 : 124010 : OutsideRange = true;
2018 : 124010 : break;
2019 : : }
2020 : : }
2021 [ + + ]: 162525 : if (OutsideRange)
2022 : 124010 : continue;
2023 [ + + ]: 65810 : for (unsigned Num : it2.second) {
2024 [ + + ]: 27295 : if (Num >= LS.size())
2025 : 2845 : LS.resize(Num + 1);
2026 : 27295 : LS[Num] = 1;
2027 : : }
2028 : : }
2029 : : }
2030 : : }
2031 : : // Compute the interference graph
2032 [ + + ]: 5740630 : for (int i = 0; i <= S.MaxPtrNumber; ++i) {
2033 : 10110000 : SetVector<int> Neighbors;
2034 : 10110000 : BitVector NeighborBits(S.MaxPtrNumber);
2035 [ + + ]: 1081180000 : for (auto it : S.SafepointNumbering) {
2036 : 1076130000 : const BitVector &LS = S.LiveSets[it.second];
2037 [ + + + + : 1076130000 : if ((unsigned)i >= LS.size() || !LS[i])
+ + ]
2038 : 1069620000 : continue;
2039 : 6508860 : NeighborBits |= LS;
2040 : : }
2041 [ + + ]: 16126600 : for (int Idx = NeighborBits.find_first(); Idx >= 0; Idx = NeighborBits.find_next(Idx)) {
2042 : : // We explicitly let i be a neighbor of itself, to distinguish
2043 : : // between being the only value live at a safepoint, vs not
2044 : : // being live at any safepoint.
2045 : 11071600 : Neighbors.insert(Idx);
2046 : : }
2047 : 5055000 : S.Neighbors.push_back(Neighbors);
2048 : : }
2049 : 685627 : }
2050 : :
2051 : : /* For chordal interference graphs, this class gives the vertices in a (reverse
2052 : : * - depending on definition) perfect elimination ordering, in such a way that
2053 : : * greedy coloring gives an optimal coloring. Since our roots are in SSA form,
2054 : : * the interference should be chordal.
2055 : : */
2056 : : struct PEOIterator {
2057 : : struct Element {
2058 : : unsigned weight;
2059 : : unsigned pos;
2060 : : };
2061 : : std::vector<Element> Elements;
2062 : : std::vector<std::vector<int>> Levels;
2063 : : const std::vector<SetVector<int>> &Neighbors;
2064 : 685627 : PEOIterator(const std::vector<SetVector<int>> &Neighbors) : Neighbors(Neighbors) {
2065 : : // Initialize State
2066 : 1371250 : std::vector<int> FirstLevel;
2067 [ + + ]: 5740630 : for (unsigned i = 0; i < Neighbors.size(); ++i) {
2068 : 5055000 : FirstLevel.push_back(i);
2069 : 5055000 : Element E{0, i};
2070 : 5055000 : Elements.push_back(E);
2071 : : }
2072 : 685627 : Levels.push_back(FirstLevel);
2073 : 685627 : }
2074 : 5740630 : int next() {
2075 : : // Find the element in the highest bucket
2076 : 5740630 : int NextElement = -1;
2077 [ + + + + : 11304400 : while (NextElement == -1 && !Levels.empty()) {
+ + ]
2078 : 5563790 : std::vector<int> &LastLevel = Levels.back();
2079 [ + + + + : 15293100 : while (NextElement == -1 && !LastLevel.empty()) {
+ + ]
2080 : 9729310 : NextElement = LastLevel.back();
2081 : 9729310 : LastLevel.pop_back();
2082 : : }
2083 [ + + ]: 5563790 : if (LastLevel.empty())
2084 : 1536420 : Levels.pop_back();
2085 : : }
2086 [ + + ]: 5740630 : if (NextElement == -1)
2087 : 685627 : return NextElement;
2088 : : // Make sure not to try to re-use this later.
2089 : 5055000 : Elements[NextElement].weight = (unsigned)-1;
2090 : : // Raise neighbors
2091 [ + + ]: 16126600 : for (int Neighbor : Neighbors[NextElement]) {
2092 [ + + ]: 11071600 : if (Neighbor == NextElement)
2093 : 1722970 : continue;
2094 : 9348610 : Element &NElement = Elements[Neighbor];
2095 : : // Already processed. Don't re-enqueue
2096 [ + + ]: 9348610 : if (NElement.weight == (unsigned)-1)
2097 : 4674310 : continue;
2098 : : // Kill old queue position
2099 : 4674310 : Levels[NElement.weight][NElement.pos] = -1;
2100 : : // Raise the neighbor to the next level.
2101 : 4674310 : NElement.weight += 1;
2102 [ + + ]: 4674310 : if (NElement.weight >= Levels.size())
2103 : 850795 : Levels.push_back(std::vector<int>{});
2104 : 4674310 : Levels[NElement.weight].push_back(Neighbor);
2105 : 4674310 : NElement.pos = Levels[NElement.weight].size()-1;
2106 : : }
2107 : : // As an enhancement, we might want to periodically compactify the whole
2108 : : // data structure. This could be done here.
2109 : 5055000 : return NextElement;
2110 : : }
2111 : : };
2112 : :
2113 : 0 : JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &Colors)
2114 : : {
2115 [ # # ]: 0 : for (unsigned i = 0; i < Colors.size(); ++i) {
2116 [ # # ]: 0 : if (Colors[i] == -1)
2117 : 0 : continue;
2118 : 0 : dbgs() << "\tValue ";
2119 : 0 : S.ReversePtrNumbering.at(i)->printAsOperand(dbgs());
2120 : 0 : dbgs() << " assigned color " << Colors[i] << "\n";
2121 : : }
2122 : 0 : }
2123 : :
2124 : 685627 : std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
2125 : 685627 : std::vector<int> Colors;
2126 : 685627 : Colors.resize(S.MaxPtrNumber + 1, -1);
2127 : 1371250 : PEOIterator Ordering(S.Neighbors);
2128 : 685627 : int PreAssignedColors = 0;
2129 : : /* First assign permanent slots to things that need them due
2130 : : to returns_twice */
2131 [ + + ]: 720442 : for (auto it : S.ReturnsTwice) {
2132 : 34815 : int Num = S.SafepointNumbering.at(it);
2133 : 34815 : const BitVector &LS = S.LiveSets[Num];
2134 [ + + ]: 173525 : for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
2135 [ + + ]: 138710 : if (Colors[Idx] == -1)
2136 : 107233 : Colors[Idx] = PreAssignedColors++;
2137 : : }
2138 : : }
2139 : : /* Greedy coloring */
2140 : 685627 : int MaxAssignedColor = -1;
2141 : 685627 : int ActiveElement = 1;
2142 : 1371250 : BitVector UsedColors;
2143 [ + + ]: 5740630 : while ((ActiveElement = Ordering.next()) != -1) {
2144 [ + + ]: 5055000 : if (Colors[ActiveElement] != -1)
2145 : 107233 : continue;
2146 : 4947770 : UsedColors.resize(MaxAssignedColor + 2, false);
2147 : 4947770 : UsedColors.reset();
2148 [ + + ]: 4947770 : if (S.Neighbors[ActiveElement].empty()) {
2149 : : // No need to color a value not live at any safe point
2150 : 3332030 : continue;
2151 : : }
2152 [ + + ]: 10221900 : for (int Neighbor : S.Neighbors[ActiveElement]) {
2153 : 8606160 : int NeighborColor = Colors[Neighbor];
2154 [ + + ]: 8606160 : if (NeighborColor == -1)
2155 : 4509020 : continue;
2156 [ + + ]: 4097130 : if (NeighborColor < PreAssignedColors)
2157 : 1203840 : continue;
2158 : 2893290 : UsedColors[NeighborColor - PreAssignedColors] = 1;
2159 : : }
2160 : 1615740 : int NewColor = UsedColors.flip().find_first();
2161 [ + + ]: 1615740 : if (NewColor > MaxAssignedColor)
2162 : 484190 : MaxAssignedColor = NewColor;
2163 : 1615740 : NewColor += PreAssignedColors;
2164 : 1615740 : Colors[ActiveElement] = NewColor;
2165 : : }
2166 : 685627 : return Colors;
2167 : : }
2168 : :
2169 : : // Size of T is assumed to be `sizeof(void*)`
2170 : 754756 : Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
2171 : : {
2172 : 754756 : auto T_size = getSizeTy(T->getContext());
2173 [ + + - + ]: 754756 : assert(T == T_size || isa<PointerType>(T));
2174 : 754756 : auto TV = cast<PointerType>(V->getType());
2175 : 754756 : auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
2176 : 754756 : return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1));
2177 : : }
2178 : :
2179 : 349457 : Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
2180 : : {
2181 : 349457 : auto T_size = getSizeTy(builder.getContext());
2182 : 349457 : auto addr = EmitTagPtr(builder, T_size, V);
2183 : 349457 : LoadInst *load = builder.CreateAlignedLoad(T_size, addr, Align(sizeof(size_t)));
2184 : 349457 : load->setOrdering(AtomicOrdering::Unordered);
2185 : 349457 : load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
2186 : 349457 : MDBuilder MDB(load->getContext());
2187 : 349457 : auto *NullInt = ConstantInt::get(T_size, 0);
2188 : : // We can be sure that the tag is larger than page size.
2189 : : // Hopefully this is enough to convince LLVM that the value is still not NULL
2190 : : // after masking off the tag bits
2191 : 349457 : auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 4096));
2192 : 349457 : load->setMetadata(LLVMContext::MD_range, MDB.createRange(NonNullInt, NullInt));
2193 : 349457 : return load;
2194 : : }
2195 : :
2196 : : // Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
2197 : : // constant store loop to produce a `memset_pattern16` with a global variable
2198 : : // that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
2199 : : // This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
2200 : : // for NI pointers.
2201 : 75806 : static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
2202 : : {
2203 [ - + ]: 75806 : if (!S)
2204 : 0 : return nullptr;
2205 : 75806 : auto it = S->AllPtrNumbering.find(V);
2206 [ - + ]: 75806 : if (it == S->AllPtrNumbering.end())
2207 : 0 : return nullptr;
2208 : 75806 : auto rit = S->Refinements.find(it->second);
2209 [ + + + + ]: 75806 : return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
2210 : : }
2211 : :
2212 : 83107 : static bool IsPermRooted(Value *V, State *S)
2213 : : {
2214 [ + + ]: 83107 : if (isa<Constant>(V))
2215 : 7301 : return true;
2216 [ + + ]: 75806 : if (auto *RefinePtr = FindRefinements(V, S))
2217 [ + + + + ]: 4439 : return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
2218 : 71367 : return false;
2219 : : }
2220 : :
2221 : 10127800 : static inline void UpdatePtrNumbering(Value *From, Value *To, State *S)
2222 : : {
2223 [ + + ]: 10127800 : if (!S)
2224 : 7952850 : return;
2225 : 10120000 : auto it = S->AllPtrNumbering.find(From);
2226 [ + + ]: 10120000 : if (it == S->AllPtrNumbering.end())
2227 : 7945090 : return;
2228 : 2174900 : auto Num = it->second;
2229 : 2174900 : S->AllPtrNumbering.erase(it);
2230 [ + - ]: 2174900 : if (To) {
2231 : 2174900 : S->AllPtrNumbering[To] = Num;
2232 : : }
2233 : : }
2234 : :
2235 : 3004380 : MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
2236 : 3004380 : return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
2237 : : }
2238 : :
2239 : :
2240 : 689604 : bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
2241 : 689604 : auto T_int32 = Type::getInt32Ty(F.getContext());
2242 : 689604 : auto T_size = getSizeTy(F.getContext());
2243 : 689604 : bool ChangesMade = false;
2244 : : // We create one alloca for all the jlcall frames that haven't been processed
2245 : : // yet. LLVM would merge them anyway later, so might as well save it a bit
2246 : : // of work
2247 : 689604 : size_t maxframeargs = 0;
2248 : 689604 : Instruction *StartOff = &*(F.getEntryBlock().begin());
2249 : 689604 : PointerType *T_pprjlvalue = nullptr;
2250 : 689604 : AllocaInst *Frame = nullptr;
2251 [ + - ]: 689604 : if (T_prjlvalue) {
2252 : 689604 : T_pprjlvalue = T_prjlvalue->getPointerTo();
2253 : 1379210 : Frame = new AllocaInst(T_prjlvalue, 0,
2254 : 1379210 : ConstantInt::get(T_int32, maxframeargs), "", StartOff);
2255 : : }
2256 : 689604 : std::vector<CallInst*> write_barriers;
2257 [ + + ]: 7028820 : for (BasicBlock &BB : F) {
2258 [ + + ]: 67339400 : for (auto it = BB.begin(); it != BB.end();) {
2259 : 61000200 : Instruction *I = &*it;
2260 : : // strip all constant alias information, as it might depend on the gc having
2261 : : // preserved a gc root, which stops being true after this pass (#32215)
2262 : : // similar to RewriteStatepointsForGC::stripNonValidData, but less aggressive
2263 [ + + ]: 61000200 : if (I->getMetadata(LLVMContext::MD_invariant_load))
2264 : 2962460 : I->setMetadata(LLVMContext::MD_invariant_load, NULL);
2265 [ + + ]: 61000200 : if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
2266 [ + + + - : 7806460 : if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const"})) {
+ + ]
2267 : 3004380 : MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);
2268 [ + - ]: 3004380 : if (MutableTBAA != TBAA)
2269 : 3004380 : I->setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2270 : : }
2271 : : }
2272 : : // FCA chains created by SROA start with an undef value
2273 : : // if the type contains an tracked pointer that can lead to a partial
2274 : : // initialisation and LateLower might have inserted an extractvalue
2275 : : // of an undef field. Fix this by changing it to start with an zero-init
2276 [ + + ]: 61000200 : if (auto *IV = dyn_cast<InsertValueInst>(*&it)) {
2277 : 184425 : Value *SourceAggregate = IV->getAggregateOperand();
2278 [ + + ]: 184425 : if (isa<UndefValue>(SourceAggregate)) {
2279 : 38071 : IV->setOperand(IV->getAggregateOperandIndex(), ConstantAggregateZero::get(IV->getType()));
2280 : 38071 : ChangesMade = true;
2281 : : }
2282 : : }
2283 : :
2284 : 61000200 : auto *CI = dyn_cast<CallInst>(&*it);
2285 [ + + ]: 61000200 : if (!CI) {
2286 : 50746000 : ++it;
2287 : 50824800 : continue;
2288 : : }
2289 : 10254100 : Value *callee = CI->getCalledOperand();
2290 [ + - + + : 10254100 : if (callee && (callee == gc_flush_func || callee == gc_preserve_begin_func
+ + ]
2291 [ + + ]: 10224500 : || callee == gc_preserve_end_func)) {
2292 : : /* No replacement */
2293 [ + + + + ]: 10179900 : } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) {
2294 : 155509 : auto *obj = CI->getOperand(0);
2295 : 155509 : auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI);
2296 : 155509 : ASCI->takeName(CI);
2297 : 155509 : CI->replaceAllUsesWith(ASCI);
2298 : 155509 : UpdatePtrNumbering(CI, ASCI, S);
2299 [ + + + + ]: 10024400 : } else if (alloc_obj_func && callee == alloc_obj_func) {
2300 [ - + ]: 405299 : assert(CI->arg_size() == 3);
2301 : :
2302 : : // Initialize an IR builder.
2303 : 810598 : IRBuilder<> builder(CI);
2304 : 405299 : builder.SetCurrentDebugLocation(CI->getDebugLoc());
2305 : :
2306 : : // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
2307 : : // `julia.gc_alloc_obj` except it doesn't set the tag.
2308 : 405299 : auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
2309 : 405299 : auto ptlsLoad = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
2310 : 405299 : auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
2311 : 810598 : auto newI = builder.CreateCall(
2312 : : allocBytesIntrinsic,
2313 : : {
2314 : : ptls,
2315 : 405299 : builder.CreateIntCast(
2316 : : CI->getArgOperand(1),
2317 : : allocBytesIntrinsic->getFunctionType()->getParamType(1),
2318 : : false)
2319 : : });
2320 : 405299 : newI->takeName(CI);
2321 : :
2322 : : // LLVM alignment/bit check is not happy about addrspacecast and refuse
2323 : : // to remove write barrier because of it.
2324 : : // We pretty much only load using `T_size` so try our best to strip
2325 : : // as many cast as possible.
2326 : 405299 : auto tag = CI->getArgOperand(2)->stripPointerCastsAndAliases();
2327 [ + + ]: 405299 : if (auto C = dyn_cast<ConstantExpr>(tag)) {
2328 [ + - ]: 384340 : if (C->getOpcode() == Instruction::IntToPtr) {
2329 : 384340 : tag = C->getOperand(0);
2330 : : }
2331 : : }
2332 [ + + ]: 20959 : else if (auto LI = dyn_cast<LoadInst>(tag)) {
2333 : : // Make sure the load is correctly marked as aligned
2334 : : // since LLVM might have removed them.
2335 : : // We can't do this in general since the load might not be
2336 : : // a type in other branches.
2337 : : // However, it should be safe for us to do this on const globals
2338 : : // which should be the important cases as well.
2339 : 20958 : bool task_local = false;
2340 [ + - - + : 20958 : if (isLoadFromConstGV(LI, task_local) && getLoadValueAlign(LI) < 16) {
- + ]
2341 : 0 : Type *T_int64 = Type::getInt64Ty(LI->getContext());
2342 : 0 : auto op = ConstantAsMetadata::get(ConstantInt::get(T_int64, 16));
2343 : 0 : LI->setMetadata(LLVMContext::MD_align,
2344 : 0 : MDNode::get(LI->getContext(), { op }));
2345 : : }
2346 : : }
2347 : : // As a last resort, if we didn't manage to strip down the tag
2348 : : // for LLVM, emit an alignment assumption.
2349 : 405299 : auto tag_type = tag->getType();
2350 [ + + ]: 405299 : if (tag_type->isPointerTy()) {
2351 : 20959 : auto &DL = CI->getModule()->getDataLayout();
2352 : 20959 : auto align = tag->getPointerAlignment(DL).value();
2353 [ - + ]: 20959 : if (align < 16) {
2354 : : // On 5 <= LLVM < 12, it is illegal to call this on
2355 : : // non-integral pointer. This relies on stripping the
2356 : : // non-integralness from datalayout before this pass
2357 : 0 : builder.CreateAlignmentAssumption(DL, tag, 16);
2358 : : }
2359 : : }
2360 : : // Set the tag.
2361 : 405299 : StoreInst *store = builder.CreateAlignedStore(
2362 : 405299 : tag, EmitTagPtr(builder, tag_type, newI), Align(sizeof(size_t)));
2363 : 405299 : store->setOrdering(AtomicOrdering::Unordered);
2364 : 405299 : store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
2365 : :
2366 : : // Replace uses of the call to `julia.gc_alloc_obj` with the call to
2367 : : // `julia.gc_alloc_bytes`.
2368 : 405299 : CI->replaceAllUsesWith(newI);
2369 : :
2370 : : // Update the pointer numbering.
2371 : 810598 : UpdatePtrNumbering(CI, newI, S);
2372 [ + + + + ]: 9619100 : } else if (typeof_func && callee == typeof_func) {
2373 [ - + ]: 190173 : assert(CI->arg_size() == 1);
2374 : 380346 : IRBuilder<> builder(CI);
2375 : 190173 : builder.SetCurrentDebugLocation(CI->getDebugLoc());
2376 : 190173 : auto tag = EmitLoadTag(builder, CI->getArgOperand(0));
2377 : 190173 : auto masked = builder.CreateAnd(tag, ConstantInt::get(T_size, ~(uintptr_t)15));
2378 : 380346 : auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, JuliaType::get_pjlvalue_ty(masked->getContext())),
2379 : 190173 : T_prjlvalue);
2380 : 190173 : typ->takeName(CI);
2381 : 190173 : CI->replaceAllUsesWith(typ);
2382 : 380346 : UpdatePtrNumbering(CI, typ, S);
2383 [ + + + + ]: 9428920 : } else if ((write_barrier_func && callee == write_barrier_func) ||
2384 [ + + + + ]: 9352430 : (write_barrier_binding_func && callee == write_barrier_binding_func)) {
2385 : : // The replacement for this requires creating new BasicBlocks
2386 : : // which messes up the loop. Queue all of them to be replaced later.
2387 [ - + ]: 78755 : assert(CI->arg_size() >= 1);
2388 : 78755 : write_barriers.push_back(CI);
2389 : 78755 : ChangesMade = true;
2390 : 78755 : ++it;
2391 : 78755 : continue;
2392 [ + + + + ]: 9350170 : } else if ((call_func && callee == call_func) ||
2393 [ + + + + ]: 8796090 : (call2_func && callee == call2_func)) {
2394 [ - + ]: 557351 : assert(T_prjlvalue);
2395 : 557351 : size_t nargs = CI->arg_size();
2396 : 557351 : size_t nframeargs = nargs-1;
2397 [ + + ]: 557351 : if (callee == call_func)
2398 : 554077 : nframeargs -= 1;
2399 [ + - ]: 3274 : else if (callee == call2_func)
2400 : 3274 : nframeargs -= 2;
2401 : 1114700 : SmallVector<Value*, 4> ReplacementArgs;
2402 : 557351 : auto arg_it = CI->arg_begin();
2403 [ - + ]: 557351 : assert(arg_it != CI->arg_end());
2404 : 557351 : Value *new_callee = *(arg_it++);
2405 [ - + ]: 557351 : assert(arg_it != CI->arg_end());
2406 : 557351 : ReplacementArgs.push_back(*(arg_it++));
2407 [ + + ]: 557351 : if (callee == call2_func) {
2408 [ - + ]: 3274 : assert(arg_it != CI->arg_end());
2409 : 3274 : ReplacementArgs.push_back(*(arg_it++));
2410 : : }
2411 : 557351 : maxframeargs = std::max(maxframeargs, nframeargs);
2412 : 557351 : int slot = 0;
2413 : 1114700 : IRBuilder<> Builder (CI);
2414 [ + + ]: 1951860 : for (; arg_it != CI->arg_end(); ++arg_it) {
2415 : : // Julia emits IR with proper pointer types here, but because
2416 : : // the julia.call signature is varargs, the optimizer is allowed
2417 : : // to rewrite pointee types. It'll go away with opaque pointer
2418 : : // types anyway.
2419 : 2789010 : Builder.CreateAlignedStore(Builder.CreateBitCast(*arg_it, T_prjlvalue),
2420 : 1394500 : Builder.CreateInBoundsGEP(T_prjlvalue, Frame, ConstantInt::get(T_int32, slot++)),
2421 : 2789010 : Align(sizeof(void*)));
2422 : : }
2423 [ + + ]: 563496 : ReplacementArgs.push_back(nframeargs == 0 ?
2424 : 6145 : (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) :
2425 : : (llvm::Value*)Frame);
2426 : 557351 : ReplacementArgs.push_back(ConstantInt::get(T_int32, nframeargs));
2427 [ + + ]: 557351 : if (callee == call2_func) {
2428 : : // move trailing arg to the end now
2429 : 3274 : Value *front = ReplacementArgs.front();
2430 : 3274 : ReplacementArgs.erase(ReplacementArgs.begin());
2431 : 3274 : ReplacementArgs.push_back(front);
2432 : : }
2433 [ + + ]: 557351 : FunctionType *FTy = callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) : JuliaType::get_jlfunc_ty(CI->getContext());
2434 : 557351 : CallInst *NewCall = CallInst::Create(FTy, new_callee, ReplacementArgs, "", CI);
2435 : 557351 : NewCall->setTailCallKind(CI->getTailCallKind());
2436 : 557351 : auto callattrs = CI->getAttributes();
2437 : 557351 : callattrs = AttributeList::get(CI->getContext(), getFnAttrs(callattrs), getRetAttrs(callattrs), {});
2438 [ + - ]: 557351 : if (auto new_callee = CI->getCalledFunction()) // get the parameter attributes from the function target (if possible)
2439 : 557351 : callattrs = AttributeList::get(CI->getContext(), {callattrs, new_callee->getAttributes()});
2440 : 557351 : NewCall->setAttributes(callattrs);
2441 : 557351 : NewCall->takeName(CI);
2442 : 557351 : NewCall->copyMetadata(*CI);
2443 : 557351 : CI->replaceAllUsesWith(NewCall);
2444 : 1114700 : UpdatePtrNumbering(CI, NewCall, S);
2445 [ - + ]: 8792820 : } else if (CI->arg_size() == CI->getNumOperands()) {
2446 : : /* No operand bundle to lower */
2447 : 0 : ++it;
2448 : 0 : continue;
2449 : : } else {
2450 : 8792820 : CallInst *NewCall = CallInst::Create(CI, None, CI);
2451 : 8792820 : NewCall->takeName(CI);
2452 : 8792820 : NewCall->copyMetadata(*CI);
2453 : 8792820 : CI->replaceAllUsesWith(NewCall);
2454 : 8792820 : UpdatePtrNumbering(CI, NewCall, S);
2455 : : }
2456 [ + + ]: 10175400 : if (!CI->use_empty()) {
2457 : 26599 : CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
2458 : 26599 : UpdatePtrNumbering(CI, nullptr, S);
2459 : : }
2460 : 10175400 : it = CI->eraseFromParent();
2461 : 10175400 : ChangesMade = true;
2462 : : }
2463 : : }
2464 [ + + ]: 768359 : for (auto CI : write_barriers) {
2465 : 78755 : auto parent = CI->getArgOperand(0);
2466 [ + + ]: 78755 : if (std::all_of(CI->op_begin() + 1, CI->op_end(),
2467 [ + - + + ]: 83107 : [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
2468 : 4138 : CI->eraseFromParent();
2469 : 4138 : continue;
2470 : : }
2471 [ - + ]: 74617 : if (CFGModified) {
2472 : 0 : *CFGModified = true;
2473 : : }
2474 : 149234 : IRBuilder<> builder(CI);
2475 : 74617 : builder.SetCurrentDebugLocation(CI->getDebugLoc());
2476 : 74617 : auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
2477 : 74617 : auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
2478 : 74617 : auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
2479 : 74617 : builder.SetInsertPoint(mayTrigTerm);
2480 : 74617 : Value *anyChldNotMarked = NULL;
2481 [ + + ]: 159284 : for (unsigned i = 1; i < CI->arg_size(); i++) {
2482 : 84667 : Value *child = CI->getArgOperand(i);
2483 : 84667 : Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
2484 : 84667 : Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
2485 [ + + ]: 84667 : anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
2486 : : }
2487 [ - + ]: 74617 : assert(anyChldNotMarked); // handled by all_of test above
2488 : 74617 : MDBuilder MDB(parent->getContext());
2489 : 149234 : SmallVector<uint32_t, 2> Weights{1, 9};
2490 : 74617 : auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
2491 : : MDB.createBranchWeights(Weights));
2492 : 74617 : builder.SetInsertPoint(trigTerm);
2493 [ + + ]: 74617 : if (CI->getCalledOperand() == write_barrier_func) {
2494 : 72769 : builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
2495 : : }
2496 [ + - ]: 1848 : else if (CI->getCalledOperand() == write_barrier_binding_func) {
2497 : 1848 : builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCBinding), parent);
2498 : : }
2499 : : else {
2500 : 0 : assert(false);
2501 : : }
2502 : 74617 : CI->eraseFromParent();
2503 : : }
2504 [ + + + - ]: 689604 : if (maxframeargs == 0 && Frame) {
2505 : 563567 : Frame->eraseFromParent();
2506 : : }
2507 [ + - ]: 126037 : else if (Frame) {
2508 : 126037 : Frame->setOperand(0, ConstantInt::get(T_int32, maxframeargs));
2509 : : }
2510 : 689604 : return ChangesMade;
2511 : : }
2512 : :
2513 : 2158300 : static void AddInPredLiveOuts(BasicBlock *BB, BitVector &LiveIn, State &S)
2514 : : {
2515 : 2158300 : bool First = true;
2516 : 2158300 : std::set<BasicBlock *> Visited;
2517 : 2158300 : std::vector<BasicBlock *> WorkList;
2518 : 2158300 : WorkList.push_back(BB);
2519 [ + + ]: 26108400 : while (!WorkList.empty()) {
2520 : 24622600 : BB = &*WorkList.back();
2521 : 24622600 : WorkList.pop_back();
2522 : : // Nothing is live at function entry
2523 [ + + ]: 24622600 : if (BB == &S.F->getEntryBlock()) {
2524 : 662033 : LiveIn.reset();
2525 : 662033 : return;
2526 : : }
2527 [ + + ]: 55720100 : for (BasicBlock *Pred : predecessors(BB)) {
2528 [ + + ]: 31770000 : if (!Visited.insert(Pred).second)
2529 : 5665010 : continue;
2530 [ + + ]: 26105000 : if (!S.BBStates[Pred].HasSafepoint) {
2531 : 23166600 : WorkList.push_back(Pred);
2532 : 23166600 : continue;
2533 : : } else {
2534 : 2938320 : int LastSP = S.BBStates[Pred].Safepoints.front();
2535 [ + + ]: 2938320 : if (First) {
2536 : 1613940 : LiveIn |= S.LiveSets[LastSP];
2537 : 1613940 : First = false;
2538 : : } else {
2539 : 1324380 : LiveIn &= S.LiveSets[LastSP];
2540 : : }
2541 [ + + ]: 2938320 : if (LiveIn.empty()) // Just a compiler performance optimization
2542 : 10501 : return;
2543 : : }
2544 : : }
2545 : : }
2546 : : }
2547 : :
2548 : 1877160 : void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot,
2549 : : const std::vector<int> &Colors, Value *GCFrame,
2550 : : Instruction *InsertBefore) {
2551 : : // Get the slot address.
2552 : 3754310 : auto slotAddress = CallInst::Create(
2553 : : getOrDeclare(jl_intrinsics::getGCFrameSlot),
2554 : 1877160 : {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
2555 : : "", InsertBefore);
2556 : :
2557 : 1877160 : Value *Val = GetPtrForNumber(S, R, InsertBefore);
2558 : : // Pointee types don't have semantics, so the optimizer is
2559 : : // free to rewrite them if convenient. We need to change
2560 : : // it back here for the store.
2561 [ + + ]: 1877160 : if (Val->getType() != T_prjlvalue)
2562 : 4302 : Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
2563 : 1877160 : new StoreInst(Val, slotAddress, InsertBefore);
2564 : 1877160 : }
2565 : :
2566 : 279031 : void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
2567 : : const std::vector<int> &Colors, Value *GCFrame)
2568 : : {
2569 [ + + ]: 5446840 : for (auto &BB : *S.F) {
2570 : 5167800 : const BBState &BBS = S.BBStates[&BB];
2571 [ + + ]: 5167800 : if (!BBS.HasSafepoint) {
2572 : 3009510 : continue;
2573 : : }
2574 : 4316590 : BitVector LiveIn;
2575 : 2158300 : AddInPredLiveOuts(&BB, LiveIn, S);
2576 : 2158300 : const BitVector *LastLive = &LiveIn;
2577 : 5761310 : for(auto rit = BBS.Safepoints.rbegin();
2578 [ + + ]: 5761310 : rit != BBS.Safepoints.rend(); ++rit ) {
2579 : 3603020 : const BitVector &NowLive = S.LiveSets[*rit];
2580 [ + + ]: 10111900 : for (int Idx = NowLive.find_first(); Idx >= 0; Idx = NowLive.find_next(Idx)) {
2581 [ + + ]: 6508860 : if (!HasBitSet(*LastLive, Idx)) {
2582 : 1877160 : PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
2583 : 1877160 : S.ReverseSafepointNumbering[*rit]);
2584 : : }
2585 : : }
2586 : 3603020 : LastLive = &NowLive;
2587 : : }
2588 : : }
2589 : 279031 : }
2590 : :
2591 : 685627 : void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
2592 : 685627 : auto F = S.F;
2593 : 685627 : auto T_int32 = Type::getInt32Ty(F->getContext());
2594 : 685627 : int MaxColor = -1;
2595 [ + + ]: 5740630 : for (auto C : Colors)
2596 [ + + ]: 5055000 : if (C > MaxColor)
2597 : 321414 : MaxColor = C;
2598 : :
2599 : : // Insert instructions for the actual gc frame
2600 [ + + + + : 685627 : if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
+ + + + +
+ ]
2601 : : // Create and push a GC frame.
2602 : 558062 : auto gcframe = CallInst::Create(
2603 : : getOrDeclare(jl_intrinsics::newGCFrame),
2604 : 279031 : {ConstantInt::get(T_int32, 0)},
2605 : : "gcframe");
2606 : 279031 : gcframe->insertBefore(&*F->getEntryBlock().begin());
2607 : :
2608 : 558062 : auto pushGcframe = CallInst::Create(
2609 : : getOrDeclare(jl_intrinsics::pushGCFrame),
2610 : 279031 : {gcframe, ConstantInt::get(T_int32, 0)});
2611 : 279031 : pushGcframe->insertAfter(pgcstack);
2612 : :
2613 : : // Replace Allocas
2614 : 279031 : unsigned AllocaSlot = 2; // first two words are metadata
2615 : 1374180 : auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
2616 : : // Pick a slot for the alloca.
2617 : 343546 : unsigned align = AI->getAlignment() / sizeof(void*); // TODO: use DataLayout pointer size
2618 [ - + ]: 343546 : assert(align <= 16 / sizeof(void*) && "Alignment exceeds llvm-final-gc-lowering abilities");
2619 [ - + ]: 343546 : if (align > 1)
2620 : 0 : AllocaSlot = LLT_ALIGN(AllocaSlot, align);
2621 : 687092 : Instruction *slotAddress = CallInst::Create(
2622 : : getOrDeclare(jl_intrinsics::getGCFrameSlot),
2623 : 343546 : {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
2624 : 343546 : slotAddress->insertAfter(gcframe);
2625 : 343546 : slotAddress->takeName(AI);
2626 : :
2627 : : // Check for lifetime intrinsics on this alloca, we can't keep them
2628 : : // because we're changing the semantics
2629 : 343546 : std::vector<CallInst*> ToDelete;
2630 : 343546 : RecursivelyVisit<IntrinsicInst>([&](Use &VU) {
2631 : 22790 : IntrinsicInst *II = cast<IntrinsicInst>(VU.getUser());
2632 [ + - + - : 45580 : if ((II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ - ]
2633 : 22790 : II->getIntrinsicID() != Intrinsic::lifetime_end))
2634 : 22790 : return;
2635 : 0 : ToDelete.push_back(II);
2636 : : }, AI);
2637 [ - + ]: 343546 : for (CallInst *II : ToDelete) {
2638 : 0 : II->eraseFromParent();
2639 : : }
2640 [ + + ]: 343546 : if (slotAddress->getType() != AI->getType()) {
2641 : : // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
2642 : 193628 : auto BCI = new BitCastInst(slotAddress, AI->getType());
2643 : 193628 : BCI->insertAfter(slotAddress);
2644 : 193628 : slotAddress = BCI;
2645 : : }
2646 : 343546 : AI->replaceAllUsesWith(slotAddress);
2647 : 343546 : AI->eraseFromParent();
2648 : 343546 : AI = NULL;
2649 : 622577 : };
2650 [ + + ]: 428949 : for (AllocaInst *AI : S.Allocas) {
2651 : 149918 : auto ns = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
2652 : 149918 : replace_alloca(AI);
2653 : 149918 : AllocaSlot += ns;
2654 : : }
2655 [ + + ]: 472659 : for (auto AI : S.ArrayAllocas) {
2656 : 193628 : replace_alloca(AI.first);
2657 : 193628 : AllocaSlot += AI.second;
2658 : : }
2659 [ + + ]: 375906 : for (auto Store : S.TrackedStores) {
2660 : 96875 : auto SI = Store.first;
2661 : 96875 : auto Base = SI->getValueOperand();
2662 : : //auto Tracked = TrackCompositeType(Base->getType());
2663 [ + + ]: 207898 : for (unsigned i = 0; i < Store.second; ++i) {
2664 : 222046 : auto slotAddress = CallInst::Create(
2665 : : getOrDeclare(jl_intrinsics::getGCFrameSlot),
2666 : 111023 : {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
2667 : 111023 : slotAddress->insertAfter(gcframe);
2668 [ + + ]: 111023 : auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
2669 : 111023 : auto Elem = MaybeExtractScalar(S, ValExpr, SI);
2670 [ + + ]: 111023 : if (Elem->getType() != T_prjlvalue)
2671 : 1142 : Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
2672 : : //auto Idxs = makeArrayRef(Tracked[i]);
2673 : : //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
2674 : 111023 : Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
2675 : : (void)shadowStore;
2676 : : // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
2677 : 111023 : AllocaSlot++;
2678 : : }
2679 : : }
2680 : 279031 : auto NRoots = ConstantInt::get(T_int32, MaxColor + 1 + AllocaSlot - 2);
2681 : 279031 : gcframe->setArgOperand(0, NRoots);
2682 : 279031 : pushGcframe->setArgOperand(1, NRoots);
2683 : :
2684 : : // Insert GC frame stores
2685 : 279031 : PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
2686 : : // Insert GCFrame pops
2687 [ + + ]: 5446840 : for(Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
2688 [ + + ]: 5167800 : if (isa<ReturnInst>(I->getTerminator())) {
2689 : 258680 : auto popGcframe = CallInst::Create(
2690 : : getOrDeclare(jl_intrinsics::popGCFrame),
2691 : : {gcframe});
2692 : 258680 : popGcframe->insertBefore(I->getTerminator());
2693 : : }
2694 : : }
2695 : : }
2696 : 685627 : }
2697 : :
2698 : 689604 : bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
2699 : 689604 : initAll(*F.getParent());
2700 : : LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
2701 [ + + ]: 689604 : if (!pgcstack_getter)
2702 : 3252 : return CleanupIR(F, nullptr, CFGModified);
2703 : :
2704 : 686352 : pgcstack = getPGCstack(F);
2705 [ + + ]: 686352 : if (!pgcstack)
2706 : 725 : return CleanupIR(F, nullptr, CFGModified);
2707 : :
2708 : 1371250 : State S = LocalScan(F);
2709 : 685627 : ComputeLiveness(S);
2710 : 1371250 : std::vector<int> Colors = ColorRoots(S);
2711 : 685627 : std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
2712 : 685627 : PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
2713 : 685627 : CleanupIR(F, &S, CFGModified);
2714 : 685627 : return true;
2715 : : }
2716 : :
2717 : 689604 : bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
2718 : 95879 : auto GetDT = [this]() -> DominatorTree & {
2719 : 95879 : return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2720 : 689604 : };
2721 : 689604 : auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
2722 : 689604 : return lateLowerGCFrame.runOnFunction(F);
2723 : : }
2724 : :
2725 : 0 : PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
2726 : : {
2727 : 0 : auto GetDT = [&AM, &F]() -> DominatorTree & {
2728 : 0 : return AM.getResult<DominatorTreeAnalysis>(F);
2729 : 0 : };
2730 : 0 : auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
2731 : 0 : bool CFGModified = false;
2732 [ # # ]: 0 : if (lateLowerGCFrame.runOnFunction(F, &CFGModified)) {
2733 [ # # ]: 0 : if (CFGModified) {
2734 : 0 : return PreservedAnalyses::none();
2735 : : } else {
2736 : 0 : return PreservedAnalyses::allInSet<CFGAnalyses>();
2737 : : }
2738 : : }
2739 : 0 : return PreservedAnalyses::all();
2740 : : }
2741 : :
2742 : :
2743 : : char LateLowerGCFrameLegacy::ID = 0;
2744 : : static RegisterPass<LateLowerGCFrameLegacy> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
2745 : :
2746 : 1052 : Pass *createLateLowerGCFramePass() {
2747 : 1052 : return new LateLowerGCFrameLegacy();
2748 : : }
2749 : :
2750 : 0 : extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
2751 : : {
2752 : 0 : unwrap(PM)->add(createLateLowerGCFramePass());
2753 : 0 : }
|