比赛题目很简单:构造一个程序,在 stdout 上打印出自身的 MD5,程序越短越好。按最终程序文件大小字节数排名,文件越小,排名越靠前。 只能使用 ld-linux-x86-64.so, libc.so, libdl.so, libgcc_s.so, libm.so, libstdc++.so 。 禁止了 socket, shmget, fork, execvc 等 syscall 。
汇编高手如云,本人只做到 752 字节,只拿到 27 名。 但忙活好几天,学到不少东西,也有苦劳,还是值得记录一下。
基本是纯 C 实现,没有动用汇编。
基于这个 MD5 实现改造 https://gist.github.com/creationix/4710780
https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly
参照 musl 和 rt0 ,去除对 glibc 依赖
后续借鉴思路,有优化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | #define SYS_write 1 #define SYS_exit 60 inline long syscall3(long n, long a0, long a1, long a2) { long ret; __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0), "S"(a1), "d"(a2) : "rcx", "r11", "memory"); return (ret); } inline long syscall1(long n, long a0) { long ret; __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0) : "rcx", "r11", "memory"); return (ret); } static int write(int f, const char* d, int l) { int ret = syscall3(SYS_write, f, (long)(d), l); return (ret); } void _exit(int r) { syscall1(SYS_exit, r); } typedef unsigned uint32_t; typedef unsigned char uint8_t; typedef unsigned long size_t; #define NULL 0 // In words #define memcpy(dest, src, n) \ for (size_t i = 0; i < n; ++i) { \ ((char*)dest)[i] = ((const char*)src)[i]; \ } // leftrotate function definition #define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c)))) static void md5_hash(uint8_t* initial_msg, const size_t initial_len, uint32_t hash[4]) { // Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating // r specifies the per-round shift amounts const static uint8_t r[] = { 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21, }; // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables: uint32_t k[64]; for (int i = 0; i < 64; ++i) { double f = 1.0 + i; asm("fsin;\n\t" "fabs;\n\t" : "=t"(f) : "0"(f)); f *= 4294967296.0; k[i] = (uint32_t)f; } // Pre-processing: adding a single 1 bit // append "1" bit to message /* Notice: the input bytes are considered as bits strings, where the first bit is the most significant bit of the byte.[37] */ // Pre-processing: padding with zeros // append "0" bit until message length in bit ≡ 448 (mod 512) // append length mod (2 pow 64) to message const int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8; // Message (to prepare) uint8_t* msg = initial_msg msg[initial_len] = 128; // write the "1" bit uint32_t bits_len = 8 * initial_len; // note, we append the len memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer // Process the message in successive 512-bit chunks: // for each 512-bit chunk of message: for (int offset = 0; offset < new_len; offset += 64) { // break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15 uint32_t* w = (uint32_t*)(msg + offset); // Initialize hash value for this chunk: uint32_t a = hash[0]; uint32_t b = hash[1]; uint32_t c = hash[2]; uint32_t d = hash[3]; // Main loop: uint32_t i; for (i = 0; i < 64; i++) { uint32_t f, g; if (i < 16) { f = (b & c) | ((~b) & d); g = i; } else if (i < 32) { f = (d & b) | ((~d) & c); g = (5 * i + 1) % 16; } else if (i < 48) { f = b ^ c ^ d; g = (3 * i + 5) % 16; } else { f = c ^ (b | (~d)); g = (7 * i) % 16; } uint32_t temp = d; d = c; c = b; b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i / 4 / 4 * 4 + i % 4]); a = temp; } // Add this chunk's hash to result so far: hash[0] += a; hash[1] += b; hash[2] += c; hash[3] += d; } } static char xdigits(char c) { if ((c >= 0) && (c <= 9)) { return '0' + c; } return 'a' + (c - 10); } void _start(void) __attribute__((noreturn)); void _start(void) { uint32_t hash[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; char* addr = (char*)0x400000; const size_t len = 688; md5_hash(addr, len, hash); unsigned char* md = (unsigned char*)&hash[0]; char buff[32]; for (int i = 0; i < 32; ++i) { unsigned x = md[(i >> 1)]; x >>= (i & 1 ? 0 : 4); buff[i] = xdigits(x & 15); } write(1, &buff[0], sizeof(buff)); _exit(0); } |
---|
1 2 3 4 5 6 7 8 9 10 11 12 13 | CFLAGS="-static -Os -Wl,--omagic -D__RT0_WITH_FASTER_SYSCALL__ -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -Wl,--build-id=none -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -mfpmath=sse -mfancy-math-387 -finline-functions-called-once -I ./ " gcc $CFLAGS print_my_md5.c -S -fverbose-asm gcc $CFLAGS print_my_md5.s -o print_my_md5 strip -R .comment print_my_md5 strip -R .bss print_my_md5 ls -l ./print_my_md5 ./ELFkickers/sstrip/sstrip print_my_md5 ./print_my_md5 echo md5sum ./print_my_md5 ls -l ./print_my_md5 |
---|