Skip to content
  1. Aug 17, 2005
    • Chris Lattner's avatar
      Use a new helper to split critical edges, making the code simpler. · 2bf7cb52
      Chris Lattner authored
      Do not claim to not change the CFG.  We do change the cfg to split critical
      edges.  This isn't causing us a problem now, but could likely do so in the
      future.
      
      llvm-svn: 22824
      2bf7cb52
    • Chris Lattner's avatar
      add a helper · 79f8b626
      Chris Lattner authored
      llvm-svn: 22823
      79f8b626
    • Chris Lattner's avatar
      Fix a regression on X86, where FP values can be promoted too. · ba28c273
      Chris Lattner authored
      llvm-svn: 22822
      ba28c273
    • Misha Brukman's avatar
      Fix grammar · 5fbf58a7
      Misha Brukman authored
      llvm-svn: 22821
      5fbf58a7
    • Chris Lattner's avatar
      40f909ad
    • Chris Lattner's avatar
      Fix a few small typos I noticed when converting this over to the DAG->DAG · 63f774ec
      Chris Lattner authored
      selector.  Also, there is no difference between addSImm and addImm, so just
      use addImm, folding some branches.
      
      llvm-svn: 22819
      63f774ec
    • Jim Laskey's avatar
      Removed UINT_TO_FP and SINT_TO_FP from ISel outright. · 9828f26c
      Jim Laskey authored
      llvm-svn: 22818
      9828f26c
    • Andrew Lenharth's avatar
      thinko. Should fix s4addl.ll regression · 73370ba5
      Andrew Lenharth authored
      llvm-svn: 22817
      73370ba5
    • Jim Laskey's avatar
      Remove ISel code generation for UINT_TO_FP and SINT_TO_FP. Now asserts if · 5909c8b1
      Jim Laskey authored
      marked as legal.
      
      llvm-svn: 22816
      5909c8b1
    • Jim Laskey's avatar
      Make UINT_TO_FP and SINT_TO_FP use generic expansion. · 6267b2c9
      Jim Laskey authored
      llvm-svn: 22815
      6267b2c9
    • Jim Laskey's avatar
      · f2516a91
      Jim Laskey authored
      Added generic code expansion for [signed|unsigned] i32 to [f32|f64] casts in the
      legalizer.  PowerPC now uses this expansion instead of ISel version.
      
      Example:
      
      // signed integer to double conversion
      double f1(signed x) {
        return (double)x;
      }
      
      // unsigned integer to double conversion
      double f2(unsigned x) {
        return (double)x;
      }
      
      // signed integer to float conversion
      float f3(signed x) {
        return (float)x;
      }
      
      // unsigned integer to float conversion
      float f4(unsigned x) {
        return (float)x;
      }
      
      
      Byte Code:
      
      internal fastcc double %_Z2f1i(int %x) {
      entry:
              %tmp.1 = cast int %x to double          ; <double> [#uses=1]
              ret double %tmp.1
      }
      
      internal fastcc double %_Z2f2j(uint %x) {
      entry:
              %tmp.1 = cast uint %x to double         ; <double> [#uses=1]
              ret double %tmp.1
      }
      
      internal fastcc float %_Z2f3i(int %x) {
      entry:
              %tmp.1 = cast int %x to float           ; <float> [#uses=1]
              ret float %tmp.1
      }
      
      internal fastcc float %_Z2f4j(uint %x) {
      entry:
              %tmp.1 = cast uint %x to float          ; <float> [#uses=1]
              ret float %tmp.1
      }
      
      internal fastcc double %_Z2g1i(int %x) {
      entry:
              %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
              %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
              store uint 1127219200, uint* %tmp.0
              %tmp.2 = cast int %x to uint            ; <uint> [#uses=1]
              %tmp.3 = xor uint %tmp.2, 2147483648            ; <uint> [#uses=1]
              %tmp.5 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
              store uint %tmp.3, uint* %tmp.5
              %tmp.9 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
              %tmp.10 = load double* %tmp.9           ; <double> [#uses=1]
              %tmp.13 = load double* cast (long* %signed_bias to double*)             ; <double> [#uses=1]
              %tmp.14 = sub double %tmp.10, %tmp.13           ; <double> [#uses=1]
              ret double %tmp.14
      }
      
      internal fastcc double %_Z2g2j(uint %x) {
      entry:
              %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
              %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
              store uint 1127219200, uint* %tmp.0
              %tmp.1 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
              store uint %x, uint* %tmp.1
              %tmp.4 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
              %tmp.5 = load double* %tmp.4            ; <double> [#uses=1]
              %tmp.8 = load double* cast (long* %unsigned_bias to double*)            ; <double> [#uses=1]
              %tmp.9 = sub double %tmp.5, %tmp.8              ; <double> [#uses=1]
              ret double %tmp.9
      }
      
      internal fastcc float %_Z2g3i(int %x) {
      entry:
              %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
              %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
              store uint 1127219200, uint* %tmp.0
              %tmp.2 = cast int %x to uint            ; <uint> [#uses=1]
              %tmp.3 = xor uint %tmp.2, 2147483648            ; <uint> [#uses=1]
              %tmp.5 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
              store uint %tmp.3, uint* %tmp.5
              %tmp.9 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
              %tmp.10 = load double* %tmp.9           ; <double> [#uses=1]
              %tmp.13 = load double* cast (long* %signed_bias to double*)             ; <double> [#uses=1]
              %tmp.14 = sub double %tmp.10, %tmp.13           ; <double> [#uses=1]
              %tmp.16 = cast double %tmp.14 to float          ; <float> [#uses=1]
              ret float %tmp.16
      }
      
      internal fastcc float %_Z2g4j(uint %x) {
      entry:
              %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
              %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
              store uint 1127219200, uint* %tmp.0
              %tmp.1 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
              store uint %x, uint* %tmp.1
              %tmp.4 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
              %tmp.5 = load double* %tmp.4            ; <double> [#uses=1]
              %tmp.8 = load double* cast (long* %unsigned_bias to double*)            ; <double> [#uses=1]
              %tmp.9 = sub double %tmp.5, %tmp.8              ; <double> [#uses=1]
              %tmp.11 = cast double %tmp.9 to float           ; <float> [#uses=1]
              ret float %tmp.11
      }
      
      
      PowerPC Code:
      
              .machine ppc970
      
      
              .const
              .align  2
      .CPIl1__Z2f1i_0:                                        ; float 0x4330000080000000
              .long   1501560836      ; float 4.5036e+15
              .text
              .align  2
              .globl  l1__Z2f1i
      l1__Z2f1i:
      .LBBl1__Z2f1i_0:        ; entry
              xoris r2, r3, 32768
              stw r2, -4(r1)
              lis r2, 17200
              stw r2, -8(r1)
              lfd f0, -8(r1)
              lis r2, ha16(.CPIl1__Z2f1i_0)
              lfs f1, lo16(.CPIl1__Z2f1i_0)(r2)
              fsub f1, f0, f1
              blr
      
      
              .const
              .align  2
      .CPIl2__Z2f2j_0:                                        ; float 0x4330000000000000
              .long   1501560832      ; float 4.5036e+15
              .text
              .align  2
              .globl  l2__Z2f2j
      l2__Z2f2j:
      .LBBl2__Z2f2j_0:        ; entry
              stw r3, -4(r1)
              lis r2, 17200
              stw r2, -8(r1)
              lfd f0, -8(r1)
              lis r2, ha16(.CPIl2__Z2f2j_0)
              lfs f1, lo16(.CPIl2__Z2f2j_0)(r2)
              fsub f1, f0, f1
              blr
      
      
              .const
              .align  2
      .CPIl3__Z2f3i_0:                                        ; float 0x4330000080000000
              .long   1501560836      ; float 4.5036e+15
              .text
              .align  2
              .globl  l3__Z2f3i
      l3__Z2f3i:
      .LBBl3__Z2f3i_0:        ; entry
              xoris r2, r3, 32768
              stw r2, -4(r1)
              lis r2, 17200
              stw r2, -8(r1)
              lfd f0, -8(r1)
              lis r2, ha16(.CPIl3__Z2f3i_0)
              lfs f1, lo16(.CPIl3__Z2f3i_0)(r2)
              fsub f0, f0, f1
              frsp f1, f0
              blr
      
      
              .const
              .align  2
      .CPIl4__Z2f4j_0:                                        ; float 0x4330000000000000
              .long   1501560832      ; float 4.5036e+15
              .text
              .align  2
              .globl  l4__Z2f4j
      l4__Z2f4j:
      .LBBl4__Z2f4j_0:        ; entry
              stw r3, -4(r1)
              lis r2, 17200
              stw r2, -8(r1)
              lfd f0, -8(r1)
              lis r2, ha16(.CPIl4__Z2f4j_0)
              lfs f1, lo16(.CPIl4__Z2f4j_0)(r2)
              fsub f0, f0, f1
              frsp f1, f0
              blr
      
      llvm-svn: 22814
      f2516a91
    • Chris Lattner's avatar
      add a new TargetConstant node · 0d2456e1
      Chris Lattner authored
      llvm-svn: 22813
      0d2456e1
    • Chris Lattner's avatar
      add some helper methods, and a new TargetConstant node, which is not · 12214cf5
      Chris Lattner authored
      subjected to folding.
      
      llvm-svn: 22812
      12214cf5
    • Nate Begeman's avatar
      Implement a couple improvements: · 784c8068
      Nate Begeman authored
      Remove dead code in ISD::Constant handling
      Add support for add long, imm16
      
      We now codegen 'long long foo(long long a) { return ++a; }'
      as:
      addic r4, r4, 1
      addze r3, r3
      blr
      
      instead of:
      li r2, 1
      li r5, 0
      addc r2, r4, r2
      adde r3, r3, r5
      blr
      
      llvm-svn: 22811
      784c8068
    • Chris Lattner's avatar
      remove a dead enum value, making a comment above correct again · 2111aae8
      Chris Lattner authored
      llvm-svn: 22810
      2111aae8
  2. Aug 16, 2005
  3. Aug 15, 2005
  4. Aug 14, 2005
    • Nate Begeman's avatar
      Fix last night's PPC32 regressions by · d5e739dc
      Nate Begeman authored
      1. Not selecting the false value of a select_cc in the false arm, which
         isn't legal for nested selects.
      2. Actually returning the node we created and Legalized in the FP_TO_UINT
         Expander.
      
      llvm-svn: 22789
      d5e739dc
    • Nate Begeman's avatar
      Fix last night's X86 regressions by putting code for SSE in the if(SSE) · e5394d45
      Nate Begeman authored
      block.  nur.
      
      llvm-svn: 22788
      e5394d45
    • Andrew Lenharth's avatar
      only build .a on alpha · ed072338
      Andrew Lenharth authored
      llvm-svn: 22787
      ed072338
    • Nate Begeman's avatar
      Fix FP_TO_UINT with Scalar SSE2 now that the legalizer can handle it. We · 4d959f66
      Nate Begeman authored
      now generate the relatively good code sequences:
      unsigned short foo(float a) { return a; }
      _foo:
              movss 4(%esp), %xmm0
              cvttss2si %xmm0, %eax
              movzwl %ax, %eax
              ret
      
      and
      unsigned bar(float a) { return a; }
      _bar:
              movss .CPI_bar_0, %xmm0
              movss 4(%esp), %xmm1
              movapd %xmm1, %xmm2
              subss %xmm0, %xmm2
              cvttss2si %xmm2, %eax
              xorl $-2147483648, %eax
              cvttss2si %xmm1, %ecx
              ucomiss %xmm0, %xmm1
              cmovb %ecx, %eax
              ret
      
      llvm-svn: 22786
      4d959f66
    • Nate Begeman's avatar
      Teach the legalizer how to legalize FP_TO_UINT. · 36853ee1
      Nate Begeman authored
      Teach the legalizer to promote FP_TO_UINT to FP_TO_SINT if the wider
        FP_TO_UINT is also illegal.  This allows us on PPC to codegen
        unsigned short foo(float a) { return a; }
      
      as:
      _foo:
      .LBB_foo_0:     ; entry
              fctiwz f0, f1
              stfd f0, -8(r1)
              lwz r2, -4(r1)
              rlwinm r3, r2, 0, 16, 31
              blr
      
      instead of:
      _foo:
      .LBB_foo_0:     ; entry
              fctiwz f0, f1
              stfd f0, -8(r1)
              lwz r2, -4(r1)
              lis r3, ha16(.CPI_foo_0)
              lfs f0, lo16(.CPI_foo_0)(r3)
              fcmpu cr0, f1, f0
              blt .LBB_foo_2  ; entry
      .LBB_foo_1:     ; entry
              fsubs f0, f1, f0
              fctiwz f0, f0
              stfd f0, -16(r1)
              lwz r2, -12(r1)
              xoris r2, r2, 32768
      .LBB_foo_2:     ; entry
              rlwinm r3, r2, 0, 16, 31
              blr
      
      llvm-svn: 22785
      36853ee1
    • Nate Begeman's avatar
      Make FP_TO_UINT Illegal. This allows us to generate significantly better · 83f6b98c
      Nate Begeman authored
      codegen for FP_TO_UINT by using the legalizer's SELECT variant.
      
      Implement a codegen improvement for SELECT_CC, selecting the false node in
      the MBB that feeds the phi node.  This allows us to codegen:
      void foo(int *a, int b, int c) { int d = (a < b) ? 5 : 9; *a = d; }
      as:
      _foo:
              li r2, 5
              cmpw cr0, r4, r3
              bgt .LBB_foo_2  ; entry
      .LBB_foo_1:     ; entry
              li r2, 9
      .LBB_foo_2:     ; entry
              stw r2, 0(r3)
              blr
      
      insted of:
      _foo:
              li r2, 5
              li r5, 9
              cmpw cr0, r4, r3
              bgt .LBB_foo_2  ; entry
      .LBB_foo_1:     ; entry
              or r2, r5, r5
      .LBB_foo_2:     ; entry
              stw r2, 0(r3)
              blr
      
      llvm-svn: 22784
      83f6b98c
Loading