Skip to content
MachOWriter.cpp 14.5 KiB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file was developed by Nate Begeman and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the target-independent Mach-O writer.  This file writes
// out the Mach-O file in the following order:
//
//  #1 FatHeader (universal-only)
//  #2 FatArch (universal-only, 1 per universal arch)
//  Per arch:
//    #3 Header
//    #4 Load Commands
//    #5 Sections
//    #6 Relocations
//    #7 Symbols
//    #8 Strings
//
//===----------------------------------------------------------------------===//

#include "llvm/Module.h"
#include "llvm/CodeGen/MachineCodeEmitter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/CodeGen/MachOWriter.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetJITInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Mangler.h"
#include <iostream>
using namespace llvm;

//===----------------------------------------------------------------------===//
//                       MachOCodeEmitter Implementation
//===----------------------------------------------------------------------===//

namespace llvm {
  /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code 
  /// for functions to the Mach-O file.
  class MachOCodeEmitter : public MachineCodeEmitter {
    MachOWriter &MOW;
    
    /// MOS - The current section we're writing to
    MachOWriter::MachOSection *MOS;

    /// Relocations - These are the relocations that the function needs, as
    /// emitted.
    std::vector<MachineRelocation> Relocations;

    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
    /// It is filled in by the StartMachineBasicBlock callback and queried by
    /// the getMachineBasicBlockAddress callback.
    std::vector<intptr_t> MBBLocations;
    
  public:
    MachOCodeEmitter(MachOWriter &mow) : MOW(mow) {}

    void startFunction(MachineFunction &F);
    bool finishFunction(MachineFunction &F);

    void addRelocation(const MachineRelocation &MR) {
      Relocations.push_back(MR);
    }
    
    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
        MBBLocations.resize((MBB->getNumber()+1)*2);
      MBBLocations[MBB->getNumber()] = getCurrentPCValue();
    }

    virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const {
      assert(0 && "CP not implementated yet!");
      return 0;
    }
    virtual intptr_t getJumpTableEntryAddress(unsigned Index) const {
      assert(0 && "JT not implementated yet!");
      return 0;
    }

    virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
      return MBBLocations[MBB->getNumber()];
    }

    /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
    void startFunctionStub(unsigned StubSize) {
      assert(0 && "JIT specific function called!");
      abort();
    }
    void *finishFunctionStub(const Function *F) {
      assert(0 && "JIT specific function called!");
      abort();
      return 0;
    }
  };
}

/// startFunction - This callback is invoked when a new machine function is
/// about to be emitted.
void MachOCodeEmitter::startFunction(MachineFunction &F) {
  // Align the output buffer to the appropriate alignment, power of 2.
  // FIXME: GENERICIZE!!
  unsigned Align = 4;

  // Get the Mach-O Section that this function belongs in.
  MOS = &MOW.getTextSection();
  
   // FIXME: better memory management
  MOS->SectionData.reserve(4096);
  BufferBegin = &(MOS->SectionData[0]);
  BufferEnd = BufferBegin + MOS->SectionData.capacity();
  CurBufferPtr = BufferBegin + MOS->size;

  // Upgrade the section alignment if required.
  if (MOS->align < Align) MOS->align = Align;

  // Make sure we only relocate to this function's MBBs.
  MBBLocations.clear();
}

/// finishFunction - This callback is invoked after the function is completely
/// finished.
bool MachOCodeEmitter::finishFunction(MachineFunction &F) {
  MOS->size += CurBufferPtr - BufferBegin;
  
  // Get a symbol for the function to add to the symbol table
  MachOWriter::MachOSym FnSym(F.getFunction(), MOS->Index);
  
  // Figure out the binding (linkage) of the symbol.
  switch (F.getFunction()->getLinkage()) {
  default:
    // appending linkage is illegal for functions.
    assert(0 && "Unknown linkage type!");
  case GlobalValue::ExternalLinkage:
    FnSym.n_type = MachOWriter::MachOSym::N_SECT | MachOWriter::MachOSym::N_EXT;
    break;
  case GlobalValue::InternalLinkage:
    FnSym.n_type = MachOWriter::MachOSym::N_SECT;
    break;
  }
  
  // Resolve the function's relocations either to concrete pointers in the case
  // of branches from one block to another, or to target relocation entries.
  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
    MachineRelocation &MR = Relocations[i];
    if (MR.isBasicBlock()) {
      void *MBBAddr = (void *)getMachineBasicBlockAddress(MR.getBasicBlock());
      MR.setResultPointer(MBBAddr);
      MOW.TM.getJITInfo()->relocate(BufferBegin, &MR, 1, 0);
      // FIXME: we basically want the JITInfo relocate() function to rewrite
      //        this guy right now, so we just write the correct displacement
      //        to the file.
    } else {
      // isString | isGV | isCPI | isJTI
      // FIXME: do something smart here.  We won't be able to relocate these
      //        until the sections are all layed out, but we still need to
      //        record them.  Maybe emit TargetRelocations and then resolve
      //        those at file writing time?
      std::cerr << "whee!\n";
    }
  }
  Relocations.clear();
  
  // Finally, add it to the symtab.
  MOW.SymbolTable.push_back(FnSym);
  return false;
}

//===----------------------------------------------------------------------===//
//                          MachOWriter Implementation
//===----------------------------------------------------------------------===//

MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm) : O(o), TM(tm) {
  // FIXME: set cpu type and cpu subtype somehow from TM
  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
  isLittleEndian = TM.getTargetData()->isLittleEndian();

  // Create the machine code emitter object for this target.
  MCE = new MachOCodeEmitter(*this);
}

MachOWriter::~MachOWriter() {
  delete MCE;
}

void MachOWriter::EmitGlobal(GlobalVariable *GV) {
  // FIXME: do something smart here.
}


bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
  // Nothing to do here, this is all done through the MCE object.
  return false;
}

bool MachOWriter::doInitialization(Module &M) {
  // Set the magic value, now that we know the pointer size and endianness
  Header.setMagic(isLittleEndian, is64Bit);

  // Set the file type
  // FIXME: this only works for object files, we do not support the creation
  //        of dynamic libraries or executables at this time.
  Header.filetype = MachOHeader::MH_OBJECT;

  Mang = new Mangler(M);
  return false;
}

/// doFinalization - Now that the module has been completely processed, emit
/// the Mach-O file to 'O'.
bool MachOWriter::doFinalization(Module &M) {
  // Okay, the.text section has been completed, build the .data, .bss, and 
  // "common" sections next.
  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
       I != E; ++I)
    EmitGlobal(I);
  
  // Emit the header and load commands.
  EmitHeaderAndLoadCommands();

  // Emit the text and data sections.
  EmitSections();

  // Emit the relocation entry data for each section.
  // FIXME: presumably this should be a virtual method, since different targets
  //        have different relocation types.
  EmitRelocations();

  // Emit the symbol table.
  // FIXME: we don't handle debug info yet, we should probably do that.
  EmitSymbolTable();

  // Emit the string table for the sections we have.
  EmitStringTable();

  // We are done with the abstract symbols.
  SectionList.clear();
  SymbolTable.clear();
  DynamicSymbolTable.clear();

  // Release the name mangler object.
  delete Mang; Mang = 0;
  return false;
}

void MachOWriter::EmitHeaderAndLoadCommands() {
  // Step #0: Fill in the segment load command size, since we need it to figure
  //          out the rest of the header fields
  MachOSegment SEG("", is64Bit);
  SEG.nsects  = SectionList.size();
  SEG.cmdsize = SEG.cmdSize(is64Bit) + 
                SEG.nsects * SectionList.begin()->cmdSize(is64Bit);
  
  // Step #1: calculate the number of load commands.  We always have at least
  //          one, for the LC_SEGMENT load command, plus two for the normal
  //          and dynamic symbol tables, if there are any symbols.
  Header.ncmds = SymbolTable.empty() ? 1 : 3;
  
  // Step #2: calculate the size of the load commands
  Header.sizeofcmds = SEG.cmdsize;
  if (!SymbolTable.empty())
    Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize;
    
  // Step #3: write the header to the file
  // Local alias to shortenify coming code.
  DataBuffer &FH = Header.HeaderData;
  outword(FH, Header.magic);
  outword(FH, Header.cputype);
  outword(FH, Header.cpusubtype);
  outword(FH, Header.filetype);
  outword(FH, Header.ncmds);
  outword(FH, Header.sizeofcmds);
  outword(FH, Header.flags);
  if (is64Bit)
    outword(FH, Header.reserved);
  
  // Step #4: Finish filling in the segment load command and write it out
  for (std::list<MachOSection>::iterator I = SectionList.begin(),
         E = SectionList.end(); I != E; ++I)
    SEG.filesize += I->size;
  SEG.vmsize = SEG.filesize;
  SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
  
  outword(FH, SEG.cmd);
  outword(FH, SEG.cmdsize);
  outstring(FH, SEG.segname, 16);
  outaddr(FH, SEG.vmaddr);
  outaddr(FH, SEG.vmsize);
  outaddr(FH, SEG.fileoff);
  outaddr(FH, SEG.filesize);
  outword(FH, SEG.maxprot);
  outword(FH, SEG.initprot);
  outword(FH, SEG.nsects);
  outword(FH, SEG.flags);
  
  // Step #5: Write out the section commands for each section
  for (std::list<MachOSection>::iterator I = SectionList.begin(),
         E = SectionList.end(); I != E; ++I) {
    I->offset = SEG.fileoff;  // FIXME: separate offset
    outstring(FH, I->sectname, 16);
    outstring(FH, I->segname, 16);
    outaddr(FH, I->addr);
    outaddr(FH, I->size);
    outword(FH, I->offset);
    outword(FH, I->align);
    outword(FH, I->reloff);
    outword(FH, I->nreloc);
    outword(FH, I->flags);
    outword(FH, I->reserved1);
    outword(FH, I->reserved2);
    if (is64Bit)
      outword(FH, I->reserved3);
  }
  
  // Step #6: Emit LC_SYMTAB/LC_DYSYMTAB load commands
  // FIXME: We'll need to scan over the symbol table and possibly do the sort
  // here so that we can set the proper indices in the dysymtab load command for
  // the index and number of external symbols defined in this module.
  // FIXME: We'll also need to scan over all the symbols so that we can 
  // calculate the size of the string table.
  // FIXME: add size of relocs
  SymTab.symoff  = SEG.fileoff + SEG.filesize;
  SymTab.nsyms   = SymbolTable.size();
  SymTab.stroff  = SymTab.symoff + SymTab.nsyms * MachOSym::entrySize();
  SymTab.strsize = 10;
  outword(FH, SymTab.cmd);
  outword(FH, SymTab.cmdsize);
  outword(FH, SymTab.symoff);
  outword(FH, SymTab.nsyms);
  outword(FH, SymTab.stroff);
  outword(FH, SymTab.strsize);

  // FIXME: set DySymTab fields appropriately
  outword(FH, DySymTab.cmd);
  outword(FH, DySymTab.cmdsize);
  outword(FH, DySymTab.ilocalsym);
  outword(FH, DySymTab.nlocalsym);
  outword(FH, DySymTab.iextdefsym);
  outword(FH, DySymTab.nextdefsym);
  outword(FH, DySymTab.iundefsym);
  outword(FH, DySymTab.nundefsym);
  outword(FH, DySymTab.tocoff);
  outword(FH, DySymTab.ntoc);
  outword(FH, DySymTab.modtaboff);
  outword(FH, DySymTab.nmodtab);
  outword(FH, DySymTab.extrefsymoff);
  outword(FH, DySymTab.nextrefsyms);
  outword(FH, DySymTab.indirectsymoff);
  outword(FH, DySymTab.nindirectsyms);
  outword(FH, DySymTab.extreloff);
  outword(FH, DySymTab.nextrel);
  outword(FH, DySymTab.locreloff);
  outword(FH, DySymTab.nlocrel);
  
  O.write((char*)&FH[0], FH.size());
}

/// EmitSections - Now that we have constructed the file header and load
/// commands, emit the data for each section to the file.
void MachOWriter::EmitSections() {
  for (std::list<MachOSection>::iterator I = SectionList.begin(),
         E = SectionList.end(); I != E; ++I) {
    O.write((char*)&I->SectionData[0], I->size);
  }
}

void MachOWriter::EmitRelocations() {
  // FIXME: this should probably be a pure virtual function, since the
  // relocation types and layout of the relocations themselves are target
  // specific.
}

/// EmitSymbolTable - Sort the symbols we encountered and assign them each a 
/// string table index so that they appear in the correct order in the output 
/// file.
void MachOWriter::EmitSymbolTable() {
  // The order of the symbol table is:
  // local symbols
  // defined external symbols (sorted by name)
  // undefined external symbols (sorted by name)
  DataBuffer ST;
  
  // FIXME: enforce the above ordering, presumably by sorting by name, 
  // then partitioning twice.
  unsigned stringIndex;
  for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
         E = SymbolTable.end(); I != E; ++I) {
    // FIXME: remove when we actually calculate these correctly
    I->n_strx = 1;
    StringTable.push_back(Mang->getValueName(I->GV));
    // Emit nlist to buffer
    outword(ST, I->n_strx);
    outbyte(ST, I->n_type);
    outbyte(ST, I->n_sect);
    outhalf(ST, I->n_desc);
    outaddr(ST, I->n_value);
  }
  
  O.write((char*)&ST[0], ST.size());
}

/// EmitStringTable - This method adds and emits a section for the Mach-O 
/// string table.
void MachOWriter::EmitStringTable() {
  // The order of the string table is:
  // strings for external symbols
  // strings for local symbols
  // This is the symbol table, but backwards.  This allows us to avoid a sorting
  // the symbol table again; all we have to do is use a reverse iterator.
  DataBuffer ST;

  // Write out a leading zero byte when emitting string table, for n_strx == 0
  // which means an empty string.
  outbyte(ST, 0);

  for (std::vector<std::string>::iterator I = StringTable.begin(),
         E = StringTable.end(); I != E; ++I) {
    // FIXME: do not arbitrarily cap symbols to 16 characters
    // FIXME: do something more efficient than outstring
    outstring(ST, *I, 16);
  }
  O.write((char*)&ST[0], ST.size());
}