From 309327a4b5ac9d8fbb0cf0fd06f7f50d7b4c285f Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 25 Feb 2004 07:00:55 +0000 Subject: [PATCH] Teach the instruction selector how to transform 'array' GEP computations into X86 scaled indexes. This allows us to compile GEP's like this: int* %test([10 x { int, { int } }]* %X, int %Idx) { %Idx = cast int %Idx to long %X = getelementptr [10 x { int, { int } }]* %X, long 0, long %Idx, ubyte 1, ubyte 0 ret int* %X } Into a single address computation: test: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] lea %EAX, DWORD PTR [%EAX + 8*%ECX + 4] ret Before it generated: test: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] shl %ECX, 3 add %EAX, %ECX lea %EAX, DWORD PTR [%EAX + 4] ret This is useful for things like int/float/double arrays, as the indexing can be folded into the loads&stores, reducing register pressure and decreasing the pressure on the decode unit. With these changes, I expect our performance on 256.bzip2 and gzip to improve a lot. On bzip2 for example, we go from this: 10665 asm-printer - Number of machine instrs printed 40 ra-local - Number of loads/stores folded into instructions 1708 ra-local - Number of loads added 1532 ra-local - Number of stores added 1354 twoaddressinstruction - Number of instructions added 1354 twoaddressinstruction - Number of two-address instructions 2794 x86-peephole - Number of peephole optimization performed to this: 9873 asm-printer - Number of machine instrs printed 41 ra-local - Number of loads/stores folded into instructions 1710 ra-local - Number of loads added 1521 ra-local - Number of stores added 789 twoaddressinstruction - Number of instructions added 789 twoaddressinstruction - Number of two-address instructions 2142 x86-peephole - Number of peephole optimization performed ... and these types of instructions are often in tight loops. Linear scan is also helped, but not as much. It goes from: 8787 asm-printer - Number of machine instrs printed 2389 liveintervals - Number of identity moves eliminated after coalescing 2288 liveintervals - Number of interval joins performed 3522 liveintervals - Number of intervals after coalescing 5810 liveintervals - Number of original intervals 700 spiller - Number of loads added 487 spiller - Number of stores added 303 spiller - Number of register spills 1354 twoaddressinstruction - Number of instructions added 1354 twoaddressinstruction - Number of two-address instructions 363 x86-peephole - Number of peephole optimization performed to: 7982 asm-printer - Number of machine instrs printed 1759 liveintervals - Number of identity moves eliminated after coalescing 1658 liveintervals - Number of interval joins performed 3282 liveintervals - Number of intervals after coalescing 4940 liveintervals - Number of original intervals 635 spiller - Number of loads added 452 spiller - Number of stores added 288 spiller - Number of register spills 789 twoaddressinstruction - Number of instructions added 789 twoaddressinstruction - Number of two-address instructions 258 x86-peephole - Number of peephole optimization performed Though I'm not complaining about the drop in the number of intervals. :) llvm-svn: 11820 --- llvm/lib/Target/X86/InstSelectSimple.cpp | 47 ++++++++++++------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/X86/InstSelectSimple.cpp b/llvm/lib/Target/X86/InstSelectSimple.cpp index d6e3a75186f0..d109d061fce0 100644 --- a/llvm/lib/Target/X86/InstSelectSimple.cpp +++ b/llvm/lib/Target/X86/InstSelectSimple.cpp @@ -2438,11 +2438,30 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, assert(idx->getType() == Type::LongTy && "Bad GEP array index!"); // If idx is a constant, fold it into the offset. + unsigned TypeSize = TD.getTypeSize(SqTy->getElementType()); if (ConstantSInt *CSI = dyn_cast(idx)) { - Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue(); + Disp += TypeSize*CSI->getValue(); } else { - // If we can't handle it, return. - return; + // If the index reg is already taken, we can't handle this index. + if (IndexReg) return; + + // If this is a size that we can handle, then add the index as + switch (TypeSize) { + case 1: case 2: case 4: case 8: + // These are all acceptable scales on X86. + Scale = TypeSize; + break; + default: + // Otherwise, we can't handle this scale + return; + } + + if (CastInst *CI = dyn_cast(idx)) + if (CI->getOperand(0)->getType() == Type::IntTy || + CI->getOperand(0)->getType() == Type::UIntTy) + idx = CI->getOperand(0); + + IndexReg = MBB ? getReg(idx, MBB, IP) : 1; } GEPOps.pop_back(); // Consume a GEP operand @@ -2456,7 +2475,7 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, // FIXME: When addressing modes are more powerful/correct, we could load // global addresses directly as 32-bit immediates. assert(BaseReg == 0); - BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0; + BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1; GEPOps.pop_back(); // Consume the last GEP operand } @@ -2538,26 +2557,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } break; // we are now done - } else if (const StructType *StTy = dyn_cast(GEPTypes.back())) { - // It's a struct access. CUI is the index into the structure, - // which names the field. This index must have unsigned type. - const ConstantUInt *CUI = cast(GEPOps.back()); - GEPOps.pop_back(); // Consume a GEP operand - GEPTypes.pop_back(); - - // Use the TargetData structure to pick out what the layout of the - // structure is in memory. Since the structure index must be constant, we - // can get its value and use it to find the right byte offset from the - // StructLayout class's list of structure member offsets. - unsigned idxValue = CUI->getValue(); - unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; - if (FieldOff) { - unsigned Reg = makeAnotherReg(Type::UIntTy); - // Emit an ADD to add FieldOff to the basePtr. - BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff); - --IP; // Insert the next instruction before this one. - TargetReg = Reg; // Codegen the rest of the GEP into this - } } else { // It's an array or pointer access: [ArraySize x ElementType]. const SequentialType *SqTy = cast(GEPTypes.back()); -- GitLab