Pasted on September 28 2012 04:43:25
Never expires
By anonymous

  1.  
  2. // saving one 64-bit unsigned integer with little endian byte order
  3. // how hard can that be? we don't need any compiler-specific intrinsics or libraries, right?
  4.  
  5. void store(uint64_t in, uint8_t * out) {
  6. for(size_t i = 0; i < 8; i++) {
  7. out[i] = uint8_t(in);
  8. in >>= 8;
  9. // or out[i] = uint8_t(in >> (i * 8)); - makes no difference
  10. }
  11. }
  12.  
  13. // compiled for amd64:
  14.  
  15. gcc 4.7.1 -O3:
  16. movq %rdi, %rax # copy input into RAX
  17. movb %dil, (%rsi) # and save the first input byte
  18. shrq $8, %rax # shift RAX 8 bits (so we can use AL instead of AH - because we can)
  19. movb %al, 1(%rsi) # and save the next input byte
  20. # for the next byte let's just shift RAX 8 more bits - or not
  21. movq %rdi, %rax # lolwut, why not copy the input again
  22. shrq $16, %rax # so we can use up two instructions instead of one
  23. ... # continue this madness for the remaining bytes
  24. # total instruction count: 21
  25.  
  26. clang 3.1 -O3:
  27. movb %dil, (%rsi) # and save the first input byte
  28. movq %rdi, %rax # copy input into RAX
  29. movb %ah, 1(%rsi) # and save the next input byte
  30. movq %rdi, %rax # copy input into RAX again, just for the lulz
  31. shrq $16, %rax # well, at least it's one instruction less than gcc
  32. ... # continue this madness for the remaining bytes
  33. # total instruction count: 20
  34.  
  35. # now, would one (possible unaligned) 64-bit store have been that bad?
  36. movq %rdi, (%rsi)
  37.