Loading... # 阿里云CTF 2025 复现 ## flag-LS 给了一个vsix文件,查询可知是vscode插件,vscode安装下,然后打开flag-LS目录,发现 * input.flag里是空的,输入内容会被做各种加密 * 打开其他的插件就会崩溃 * 测试发现加密可以选择algo1、algo2、algo3 * algo1:Base58+一个字符 * algo2:移位+一个字符 * algo3:reverse+一个字符 接着去flag-LS.exe搜索Base58表,定位编码函数;还可以搜索下algo,定位到一个函数 在他的下方找到了rot实现,发现每轮加密都会做base58、rot、reverse,然后从中选取一种加密,所以每次加密都会增加移位数(qword_692320) ~~~c++ void __fastcall sub_4CE320(__int64 a1, __int64 a2, __int64 a3, char a4) { // [COLLAPSED LOCAL DECLARATIONS. PRESS NUMPAD "+" TO EXPAND] if ( (unsigned __int64)&retaddr <= *(_QWORD *)(v6 + 16) ) sub_4631E0(); v32 = v4; if ( a4 ) ++qword_692320; v33 = v5; v7 = runtime_makeslice(); v8 = 0LL; v9 = v5; v10 = 0LL; while ( v33 > v8 ) { v26 = v9; v29 = v7; v11 = *(unsigned __int8 *)(v32 + v8); if ( (unsigned int)v11 >= 0x80 ) { v11 = runtime_decoderune(); v12 = v33; } else { v12 = v8 + 1; } if ( (unsigned int)(v11 - 97) <= 0x19 ) v11 = qword_692320 + v11 - 26 * ((v11 + (int)qword_692320 - 97) / 26); if ( (unsigned int)(v11 - 65) <= 0x19 ) v11 = qword_692320 + v11 - 26 * ((v11 + (int)qword_692320 - 65) / 26); v28 = v12; if ( (unsigned int)(v11 - 48) <= 9 ) v11 = qword_692320 + v11 - 10 * (((int)((unsigned __int64)(3435973837LL * (v11 + (int)qword_692320 - 48)) >> 32) >> 3) - ((v11 + (int)qword_692320 - 48) >> 31)); v27 = v10 + 1; v13 = v11; v14 = runtime_intstring(); v15 = v26; v16 = v27; if ( v26 < v27 ) { v30 = v14; runtime_growslice(v21, v22, v23, v24, v25); v17 = v18; v16 = v27; v14 = v30; v13 = v11; } else { v17 = v29; } v19 = 16 * (v16 - 1); *(_QWORD *)(v17 + v19 + 8) = v13; if ( dword_706850 ) { v14 = sub_465120(); *v20 = v14; v20[1] = *(_QWORD *)(v17 + v19); } *(_QWORD *)(v17 + v19) = v14; v7 = v17; v10 = v16; v9 = v15; v8 = v28; } strings_Join(v21, v22, v23, v24, v25); } ~~~ 感觉硬猜也可以,这个字符基本确认是md5的十六进制摘要第一个字符,所以都是0-9a-f  my.flag给了密文,需要倒推回明文flag 解密代码如下 ~~~python from base58 import b58decode from hashlib import md5 import re cipher = "6JJgKGNErps8zJM93aJpL2AhqXbjUTTDkWwf3UVuz3GwFu1jkonQVhsN9h39QVuzTFSDA3YGjTtDyGXjF1ZwwpoBKab2vvCf6kdb1Ms9Ams2Hri9ppzomNKBD5ScoRwszTnqTZkEBw46XfLJmsnirLr3sJ3SCi26Fa36Eq3sq4bNCAtG8yAMpqxfsp7ob81Qwt6e2WtCuLUepEqN8eew79Naq5bRXWQhr9XCtBmPuiSSdNERh3p6XbARTnP9FfcvMivjhRquSw3TXvzAYiahsdBsPtFR5KLVsXtsGcXbReWowvu5GMt6Y3" BASE58_REGEX = re.compile(r'^[1-9A-HJ-NP-Za-km-z]+$') def is_base58(s): return bool(BASE58_REGEX.fullmatch(s)) def rot(s, shift): res = [] for c in s: o = ord(c) if 65 <= o <= 90: res.append(chr((o - 65 - shift) % 26 + 65)) elif 97 <= o <= 122: res.append(chr((o - 97 - shift) % 26 + 97)) elif 48 <= o <= 57: res.append(chr((o - 48 - shift) % 10 + 48)) else: res.append(c) return ''.join(res) def dfs(s, flag, shift): if "aliyunctf" in flag: print(flag) return if len(flag) == 47: return if md5(s[:-1].encode()).hexdigest()[0] != s[-1]: return s = s[:-1] # 去除校验位 # algo1 reverse if is_base58(s): try: tmp = b58decode(s).decode() if tmp.isprintable(): dfs(tmp[:-1], tmp[-1]+flag, shift-1) except: tmp = "" # algo2 reverse tmp = rot(s, shift) dfs(tmp[:-1], tmp[-1]+flag, shift-1) # algo3 reverse tmp = s[::-1] dfs(tmp[:-1], tmp[-1]+flag, shift-1) return dfs(cipher, "", 12+47) ~~~ ## babygame 游戏引擎bevy,这个题很明显不是出题人自己写的游戏,所以可以github搜索下,我搜了bevy+一个音频名就搜到了[项目](https://github.com/PraxTube/tsumi) rust符号看的晕,能看出来里面塞了一些tea类型加密,但是源码读不动 ## easy-cuda 直接导出ptx汇编代码 cuobjdump -ptx .\easy_cuda > .\cuda.txt 先看下主函数 ~~~c++ int __fastcall main(int argc, const char **argv, const char **envp) { int v4; // [rsp+Ch] [rbp-24h] FILE *stream; // [rsp+18h] [rbp-18h] void *s; // [rsp+20h] [rbp-10h] FILE *v7; // [rsp+28h] [rbp-8h] stream = fopen("flag", "rb"); fseek(stream, 0LL, 2); v4 = ftell(stream); fseek(stream, 0LL, 0); if ( (_BYTE)v4 ) v4 += 256 - v4 % 256; s = malloc(v4); memset(s, 0, v4); fread(s, 1uLL, v4, stream); fclose(stream); printf("len: %d\n", v4); cuda_encrypt((unsigned __int8 *)s, v4, 0xACu); v7 = fopen("flag_enc", "wb"); fwrite(s, 1uLL, v4, v7); fclose(v7); free(s); return 0; } ~~~ 读取了flag做了加密得到flag_enc,cuda_encrypt如下 ~~~c++ unsigned __int64 __fastcall cuda_encrypt(unsigned __int8 *a1, int a2, unsigned __int8 a3) { int v3; // eax unsigned __int8 *v6; // [rsp+18h] [rbp-28h] BYREF __int64 v7; // [rsp+20h] [rbp-20h] BYREF unsigned int v8; // [rsp+28h] [rbp-18h] __int64 v9; // [rsp+2Ch] [rbp-14h] BYREF unsigned int v10; // [rsp+34h] [rbp-Ch] unsigned __int64 v11; // [rsp+38h] [rbp-8h] v11 = __readfsqword(0x28u); cudaMalloc<unsigned char>(&v6, a2); cudaMemcpy(v6, a1, a2, 1LL); dim3::dim3((dim3 *)&v9, 0x100u, 1u, 1u); v3 = a2 + 255; if ( a2 + 255 < 0 ) v3 = a2 + 510; dim3::dim3((dim3 *)&v7, v3 >> 8, 1u, 1u); if ( !(unsigned int)_cudaPushCallConfiguration(v7, v8, v9, v10, 0LL, 0LL) ) encrypt_kernel(v6, a3); cudaMemcpy(a1, v6, a2, 2LL); cudaFree(v6); return v11 - __readfsqword(0x28u); } __int64 __fastcall encrypt_kernel(unsigned __int8 *a1, unsigned __int8 a2) { return __device_stub__Z14encrypt_kernelPhh(a1, a2); } unsigned __int64 __fastcall __device_stub__Z14encrypt_kernelPhh(unsigned __int8 *a1, char a2) { char v3; // [rsp+4h] [rbp-5Ch] BYREF unsigned __int8 *v4; // [rsp+8h] [rbp-58h] BYREF int v5; // [rsp+14h] [rbp-4Ch] __int64 v6; // [rsp+18h] [rbp-48h] BYREF __int64 v7; // [rsp+20h] [rbp-40h] BYREF __int64 v8; // [rsp+28h] [rbp-38h] BYREF int v9; // [rsp+30h] [rbp-30h] __int64 v10; // [rsp+34h] [rbp-2Ch] BYREF int v11; // [rsp+3Ch] [rbp-24h] _QWORD v12[3]; // [rsp+40h] [rbp-20h] BYREF unsigned __int64 v13; // [rsp+58h] [rbp-8h] v4 = a1; v3 = a2; v13 = __readfsqword(0x28u); v12[0] = &v4; v12[1] = &v3; v5 = 2; __device_stub__Z14encrypt_kernelPhh(unsigned char *,unsigned char)::__f = (__int64)encrypt_kernel; v8 = 0x100000001LL; v9 = 1; v10 = 0x100000001LL; v11 = 1; if ( !(unsigned int)_cudaPopCallConfiguration(&v8, &v10, &v6, &v7) ) cudaLaunchKernel<char>((unsigned int)encrypt_kernel, v8, v9, v10, v11, (unsigned int)v12, v6, v7); return v13 - __readfsqword(0x28u); } ~~~ 开始读ptx的_Z14encrypt_kernelPhh * 看到T[256]、RT[256]等常量,以及一些str字符串(giftx:),可以关注到调用这些str的位置大概是不同加密分界线,根据数值可以知道有五种加密,顺序是1-5 * gift1:关注到 ~~~ mov.u64 %SPL, __local_depot0; cvta.local.u64 %SP, %SPL; ld.param.u8 %rs12, [_Z14encrypt_kernelPhh_param_1]; // 0xAC ld.param.u64 %rd19, [_Z14encrypt_kernelPhh_param_0]; // flag字节 cvta.to.global.u64 %rd1, %rd19; add.u64 %rd20, %SP, 0; add.u64 %rd2, %SPL, 0; mov.u32 %r1, %ntid.x; mov.u32 %r54, %ctaid.x; mul.lo.s32 %r2, %r54, %r1; mov.u32 %r3, %tid.x; add.s32 %r4, %r2, %r3; setp.ge.u32 %p1, %r3, %r1; cvt.s64.s32 %rd21, %r4; add.s64 %rd3, %rd1, %rd21; @%p1 bra $L__BB0_12; ld.global.u8 %rs13, [%rd3]; // flag地址+ctaid.x*ntid.x+tid.x cvt.u16.u32 %rs14, %r4; mul.lo.s16 %rs15, %rs14, 73; // (ctaid.x*ntid.x+tid.x)*73 add.s16 %rs16, %rs15, %rs12; // (ctaid.x*ntid.x+tid.x)*73+0xAC xor.b16 %rs17, %rs13, %rs16; // tmp=[flag+ctaid.x*ntid.x+tid.x]^((ctaid.x*ntid.x+tid.x)*73+0xAC) and.b16 %rs18, %rs17, 240; shr.u16 %rs19, %rs18, 4; shl.b16 %rs20, %rs17, 4; or.b16 %rs58, %rs19, %rs20; // tmp=(tmp<<4)|(tmp>>4) mov.u32 %r242, 0; // cnt=0 mov.u64 %rd24, T; $L__BB0_2: cvt.u64.u16 %rd22, %rs58; and.b64 %rd23, %rd22, 255; add.s64 %rd25, %rd24, %rd23; ld.const.u8 %rs21, [%rd25]; // T[tmp&0xFF] shr.u16 %rs22, %rs21, 4; shl.b16 %rs23, %rs21, 4; or.b16 %rs24, %rs22, %rs23; // tmp=(T[tmp&0xFF]<<4)|(T[tmp&0xFF]>>4) cvt.u16.u32 %rs25, %r242; xor.b16 %rs58, %rs24, %rs25; // tmp^cnt add.s32 %r242, %r242, 1; // cnt++ setp.lt.u32 %p2, %r242, 10485760; // 这里查询指令可知 %p2=(cnt<10485760) @%p2 bra $L__BB0_2; // 满足则跳转 mov.u32 %r243, 0; $L__BB0_4: cvt.u64.u16 %rd26, %rs58; // 同上一个块,所以重复五次 and.b64 %rd27, %rd26, 255; add.s64 %rd29, %rd24, %rd27; ld.const.u8 %rs26, [%rd29]; shr.u16 %rs27, %rs26, 4; shl.b16 %rs28, %rs26, 4; or.b16 %rs29, %rs27, %rs28; cvt.u16.u32 %rs30, %r243; xor.b16 %rs58, %rs29, %rs30; add.s32 %r243, %r243, 1; setp.lt.u32 %p3, %r243, 10485760; @%p3 bra $L__BB0_4; mov.u32 %r244, 0; $L__BB0_6: cvt.u64.u16 %rd30, %rs58; and.b64 %rd31, %rd30, 255; add.s64 %rd33, %rd24, %rd31; ld.const.u8 %rs31, [%rd33]; shr.u16 %rs32, %rs31, 4; shl.b16 %rs33, %rs31, 4; or.b16 %rs34, %rs32, %rs33; cvt.u16.u32 %rs35, %r244; xor.b16 %rs58, %rs34, %rs35; add.s32 %r244, %r244, 1; setp.lt.u32 %p4, %r244, 10485760; @%p4 bra $L__BB0_6; mov.u32 %r245, 0; $L__BB0_8: cvt.u64.u16 %rd34, %rs58; and.b64 %rd35, %rd34, 255; add.s64 %rd37, %rd24, %rd35; ld.const.u8 %rs36, [%rd37]; shr.u16 %rs37, %rs36, 4; shl.b16 %rs38, %rs36, 4; or.b16 %rs39, %rs37, %rs38; cvt.u16.u32 %rs40, %r245; xor.b16 %rs58, %rs39, %rs40; add.s32 %r245, %r245, 1; setp.lt.u32 %p5, %r245, 10485760; @%p5 bra $L__BB0_8; mov.u32 %r246, 0; $L__BB0_10: cvt.u64.u16 %rd38, %rs58; and.b64 %rd39, %rd38, 255; add.s64 %rd41, %rd24, %rd39; ld.const.u8 %rs41, [%rd41]; shr.u16 %rs42, %rs41, 4; shl.b16 %rs43, %rs41, 4; or.b16 %rs44, %rs42, %rs43; cvt.u16.u32 %rs45, %r246; xor.b16 %rs58, %rs44, %rs45; add.s32 %r246, %r246, 1; setp.lt.u32 %p6, %r246, 10485760; @%p6 bra $L__BB0_10; st.global.u8 [%rd3], %rs58; // 最终值存储在这里,[flag地址+ctaid.x*ntid.x+tid.x],相当于覆盖回去 $L__BB0_12: bar.sync 0; setp.ne.s32 %p7, %r4, 0; @%p7 bra $L__BB0_17; ~~~ 分析完汇编写出伪代码 ~~~python for i in range(len(flag)): tmp = flag[i]^(i*73+0xAC) tmp = ((tmp<<4)|(tmp>>4))&0xFF for j in range(10485760*5): tmp = ((T[tmp]<<4)|(T[tmp]>>4))&0xFF tmp ^= j%10485760 ~~~ * gift2: ~~~ setp.eq.s32 %p10, %r1, 0; // %p10=(%r1==0) setp.ne.s32 %p11, %r3, 0; // %p11=(%r3!=0) or.pred %p12, %p11, %p10; @%p12 bra $L__BB0_20; cvt.s64.s32 %rd51, %r2; // ctaid.x*ntid.x add.s64 %rd98, %rd1, %rd51; mov.u32 %r248, 0; // cnt=0 $L__BB0_19: add.s32 %r248, %r248, 1; // cnt++ rem.u32 %r66, %r248, %r1; // cnt%ntid.x add.s32 %r67, %r66, %r2; // cnt%ntid.x+ctaid.x*ntid.x cvt.s64.s32 %rd52, %r67; add.s64 %rd53, %rd1, %rd52; ld.global.u8 %rs46, [%rd98]; // [flag+ctaid.x*ntid.x] xor.b16 %rs47, %rs46, %rs12; // [flag+ctaid.x*ntid.x]^0xAC ld.global.u8 %rs48, [%rd53]; // [flag+cnt%ntid.x+ctaid.x*ntid.x] xor.b16 %rs49, %rs47, %rs48; // [flag+ctaid.x*ntid.x]^0xAC^[flag+cnt%ntid.x+ctaid.x*ntid.x] st.global.u8 [%rd98], %rs49; // [flag+ctaid.x*ntid.x]^=0xAC^[flag+cnt%ntid.x+ctaid.x*ntid.x] add.s64 %rd98, %rd98, 1; // 相当于往后一位,和cnt同步但落后一字节 setp.lt.u32 %p13, %r248, %r1; // cnt<ntid.x @%p13 bra $L__BB0_19; ~~~ 分析完汇编写出伪代码(注意分块) ~~~python for i in range(0, len(flag), 256): for j in range(256): flag[i+j]^=flag[i+(j+1)%256]^0xAC ~~~ * gift3: ~~~ $L__BB0_25: bar.sync 0; and.b32 %r73, %r3, 1; // tid.x&1 setp.eq.b32 %p18, %r73, 1; // tid.x&1==1 add.s32 %r74, %r3, 1; // tid.x+1 rem.u32 %r75, %r74, %r1; // (tid.x+1)%ntid.x add.s32 %r76, %r75, %r2; // (tid.x+1)%ntid.x+ctaid.x*ntid.x cvt.s64.s32 %rd63, %r76; add.s64 %rd11, %rd1, %rd63; or.pred %p19, %p1, %p18; // (tid.x>=ntid.x)|(tid.x&1==1),也就是奇数才跳转 @%p19 bra $L__BB0_27; ld.global.u8 %rs50, [%rd3]; ld.global.u8 %rs51, [%rd11]; // [flag+(tid.x+1)%ntid.x+ctaid.x*ntid.x] st.global.u8 [%rd3], %rs51; st.global.u8 [%rd11], %rs50; // 这里做了交换 ~~~ 不是很好懂,可以尝试构造一个flag,然后看他的打印结果 ~~~ len: 512 gift1: 3c ea c8 1f 26 85 b9 fa 10 f6 d5 0e a2 e2 9f 8f b8 24 d8 8a 0f a6 4e 69 e3 d0 53 1d 86 63 18 34 9a 37 2a 6c c4 ce 00 aa c9 9d 1e 91 f1 75 49 6e 0b ef 48 77 12 fd f8 17 9c 27 b5 b7 c3 6a 2e 9b 16 e6 0a 8d 29 11 ae 01 84 f0 d7 13 ad 98 02 a1 64 b1 1b de 74 92 95 b4 87 81 a5 33 47 a8 ca 93 a7 5d c6 e9 46 54 3e ff bc 45 af ee 20 f5 c7 72 09 55 66 cd db df d2 e5 14 73 62 da 31 2b a9 f7 7d a0 3f 03 38 4d cc 2d f4 dc e4 cf 25 60 88 9e 5b e7 b6 4c fb e0 f2 2f 7f b0 50 79 94 4a 71 15 1c 6d 7c 7b 57 c0 c2 5e 61 e8 78 6f 23 90 b2 43 d3 19 2c fe 36 8c 6b 67 7a 80 4b ab 3d 30 0d 05 4f 5f ed a4 70 bf d1 58 f9 99 08 82 bb 5c c5 7e 56 1a 52 96 eb 76 ba c1 35 bd 28 5a 8b 97 44 22 06 41 ac be fc 59 d4 32 ec cb dd 83 51 65 a3 d6 f3 21 40 d9 b3 68 07 8e 42 04 3b 3a 0c e1 89 39 gift2: 7a 8e 7b 95 0f 90 ef 46 4a 8f 77 00 ec d1 bc 9b 30 50 fe 29 05 44 8b 26 9f 2f e2 37 49 d7 80 02 01 b1 ea 04 a6 62 06 cf f8 2f 23 cc 28 90 8b c9 48 0b 93 c9 43 a9 43 27 17 3e ae d8 05 e8 19 21 5c 40 2b 08 94 13 03 29 d8 8b 68 12 99 36 0f 69 79 06 69 06 4a ab 8d 9f aa 88 3a d8 43 ce f5 98 56 37 83 03 be c6 6d ef 55 46 ed 62 79 9e 19 d7 f0 9f 07 ba a8 a1 9b 5d cb bd 14 47 b6 2e f2 26 71 33 90 97 d9 2d 4d 75 84 94 87 46 e9 44 ba 69 10 fd 56 1b b7 be 71 fc 63 4c 85 41 72 97 c8 a5 dd bd ab 80 3b ae 30 93 25 3c bb e0 1f 8e 5d 3c 66 99 7e 64 16 4b a0 b1 56 67 4c 3a a1 91 a4 e6 bc 1e e5 78 63 c2 25 0d cc 3d 26 95 4b 35 17 84 e0 e4 68 d1 31 60 d7 58 24 39 de 7d b0 7f ca 88 eb 41 be ee 09 21 4a 72 8b ba f2 7e 98 6a d9 89 7e cd 35 c6 77 c3 25 60 ea 93 ad 9a 41 c4 1c ef gift3: 8e 7a 95 7b 90 0f 46 ef 8f 4a 00 77 d1 ec 9b bc 50 30 29 fe 44 05 26 8b 2f 9f 37 e2 d7 49 02 80 b1 01 04 ea 62 a6 cf 06 2f f8 cc 23 90 28 c9 8b 0b 48 c9 93 a9 43 27 43 3e 17 d8 ae e8 05 21 19 40 5c 08 2b 13 94 29 03 8b d8 12 68 36 99 69 0f 06 79 06 69 ab 4a 9f 8d 88 aa d8 3a ce 43 98 f5 37 56 03 83 c6 be ef 6d 46 55 62 ed 9e 79 d7 19 9f f0 ba 07 a1 a8 5d 9b bd cb 47 14 2e b6 26 f2 33 71 97 90 2d d9 75 4d 94 84 46 87 44 e9 69 ba fd 10 1b 56 be b7 fc 71 4c 63 41 85 97 72 a5 c8 bd dd 80 ab ae 3b 93 30 3c 25 e0 bb 8e 1f 3c 5d 99 66 64 7e 4b 16 b1 a0 67 56 3a 4c 91 a1 e6 a4 1e bc 78 e5 c2 63 0d 25 3d cc 95 26 35 4b 84 17 e4 e0 d1 68 60 31 58 d7 39 24 7d de 7f b0 88 ca 41 eb ee be 21 09 72 4a ba 8b 7e f2 6a 98 89 d9 cd 7e c6 35 c3 77 60 25 93 ea 9a ad c4 41 ef 1c gift4: 1c 95 7a 90 7b 46 0f 8f ef 00 4a d1 77 9b ec 50 bc 29 30 44 fe 26 05 2f 8b 37 9f d7 e2 02 49 b1 80 04 01 62 ea cf a6 2f 06 cc f8 90 23 c9 28 0b 8b c9 48 a9 93 27 43 3e 43 d8 17 e8 ae 21 05 40 19 08 5c 13 2b 29 94 8b 03 12 d8 36 68 69 99 06 0f 06 79 ab 69 9f 4a 88 8d d8 aa ce 3a 98 43 37 f5 03 56 c6 83 ef be 46 6d 62 55 9e ed d7 79 9f 19 ba f0 a1 07 5d a8 bd 9b 47 cb 2e 14 26 b6 33 f2 97 71 2d 90 75 d9 94 4d 46 84 44 87 69 e9 fd ba 1b 10 be 56 fc b7 4c 71 41 63 97 85 a5 72 bd c8 80 dd ae ab 93 3b 3c 30 e0 25 8e bb 3c 1f 99 5d 64 66 4b 7e b1 16 67 a0 3a 56 91 4c e6 a1 1e a4 78 bc c2 e5 0d 63 3d 25 95 cc 35 26 84 4b e4 17 d1 e0 60 68 58 31 39 d7 7d 24 7f de 88 b0 41 ca ee eb 21 be 72 09 ba 4a 7e 8b 6a f2 89 98 cd d9 c6 7e c3 35 60 77 93 25 9a ea c4 ad ef 41 8e gift5: 9d f5 75 e3 92 81 57 b4 f3 06 53 b1 ba 75 9a 99 23 29 82 7c 2f 3c df 86 5d 56 bd df 34 a2 02 59 29 92 a5 99 a7 44 61 5a 3e c3 0f a2 9a 91 a0 15 1c bf 4c 65 9e 13 ec 14 5d f6 e1 76 ee 63 e5 27 ee 8d 33 13 a3 2c 3b 5c a1 39 45 b1 bc 37 28 14 e9 ba 12 f9 0e 46 4f ff 49 53 32 3d 1b 88 e4 d4 6e 9f 17 9c 44 2c 71 08 2e 5c 77 0b df b7 12 dc db 3f e1 ed cb 1f 4e 21 11 5c ac 5c 92 41 1b 81 f1 8e ae 42 05 50 24 da 20 f7 0e 8a 3f 6f 34 c6 e9 37 4f 39 2c 1c 2b 86 96 e4 69 4f c3 7a 36 38 78 83 8c ca 48 37 96 6c 9e ca 5c 34 fa 13 83 9c f8 b8 08 f0 ef b9 d1 3a 66 7f 08 cc 92 56 93 e4 4f 85 fe 34 a3 08 a9 8b 4c 40 91 95 7e dc 57 2e d1 1b b5 77 43 de c8 34 b7 b1 cb 07 4f e9 35 6e ce f1 3e 90 92 c3 ed 95 c6 1a 24 f5 56 d1 84 37 a1 96 70 f2 5b ca 84 6a b5 52 22 74 6b 2b 53 8d ~~~ 分析可知,gift3做的是每两个交换一次 * gift4:和上面类似 ~~~ $L__BB0_32: bar.sync 0; setp.lt.s32 %p24, %r3, 1; or.pred %p25, %p24, %p1; shr.u32 %r82, %r3, 31; add.s32 %r83, %r3, %r82; and.b32 %r84, %r83, -2; sub.s32 %r85, %r3, %r84; setp.ne.s32 %p26, %r85, 1; or.pred %p27, %p25, %p26; @%p27 bra $L__BB0_34; ld.global.u8 %rs52, [%rd3]; ld.global.u8 %rs53, [%rd11]; st.global.u8 [%rd3], %rs53; st.global.u8 [%rd11], %rs52; $L__BB0_34: bar.sync 0; @%p7 bra $L__BB0_39; ~~~ 分析可知,从第一个开始每两个交换一次,第一个和最后一个交换 * gift5:很明显TEA特征 ~~~ ld.global.u32 %r259, [%rd3+4]; ld.global.u32 %r260, [%rd3]; mov.u32 %r258, 0; mov.u32 %r257, -239350328; mov.u32 %r256, 387276957; mov.u32 %r255, 2027808484; mov.u32 %r254, -626627285; mov.u32 %r253, 1013904242; mov.u32 %r252, -1640531527; $L__BB0_41: shl.b32 %r99, %r259, 4; add.s32 %r100, %r99, -1556008596; shr.u32 %r101, %r259, 5; add.s32 %r102, %r101, -939442524; xor.b32 %r103, %r102, %r100; add.s32 %r104, %r252, %r259; xor.b32 %r105, %r103, %r104; add.s32 %r106, %r105, %r260; shl.b32 %r107, %r106, 4; add.s32 %r108, %r107, 1013904242; add.s32 %r109, %r106, %r252; xor.b32 %r110, %r108, %r109; shr.u32 %r111, %r106, 5; add.s32 %r112, %r111, 338241895; xor.b32 %r113, %r110, %r112; add.s32 %r114, %r113, %r259; shl.b32 %r115, %r114, 4; add.s32 %r116, %r115, -1556008596; add.s32 %r117, %r253, %r114; shr.u32 %r118, %r114, 5; add.s32 %r119, %r118, -939442524; xor.b32 %r120, %r119, %r116; xor.b32 %r121, %r120, %r117; add.s32 %r122, %r121, %r106; shl.b32 %r123, %r122, 4; add.s32 %r124, %r123, 1013904242; add.s32 %r125, %r122, %r253; xor.b32 %r126, %r124, %r125; shr.u32 %r127, %r122, 5; add.s32 %r128, %r127, 338241895; xor.b32 %r129, %r126, %r128; add.s32 %r130, %r129, %r114; shl.b32 %r131, %r130, 4; add.s32 %r132, %r131, -1556008596; add.s32 %r133, %r254, %r130; shr.u32 %r134, %r130, 5; add.s32 %r135, %r134, -939442524; xor.b32 %r136, %r135, %r132; xor.b32 %r137, %r136, %r133; add.s32 %r138, %r137, %r122; shl.b32 %r139, %r138, 4; add.s32 %r140, %r139, 1013904242; add.s32 %r141, %r138, %r254; xor.b32 %r142, %r140, %r141; shr.u32 %r143, %r138, 5; add.s32 %r144, %r143, 338241895; xor.b32 %r145, %r142, %r144; add.s32 %r146, %r145, %r130; shl.b32 %r147, %r146, 4; add.s32 %r148, %r147, -1556008596; add.s32 %r149, %r255, %r146; shr.u32 %r150, %r146, 5; add.s32 %r151, %r150, -939442524; xor.b32 %r152, %r151, %r148; xor.b32 %r153, %r152, %r149; add.s32 %r154, %r153, %r138; shl.b32 %r155, %r154, 4; add.s32 %r156, %r155, 1013904242; add.s32 %r157, %r154, %r255; xor.b32 %r158, %r156, %r157; shr.u32 %r159, %r154, 5; add.s32 %r160, %r159, 338241895; xor.b32 %r161, %r158, %r160; add.s32 %r162, %r161, %r146; shl.b32 %r163, %r162, 4; add.s32 %r164, %r163, -1556008596; add.s32 %r165, %r256, %r162; shr.u32 %r166, %r162, 5; add.s32 %r167, %r166, -939442524; xor.b32 %r168, %r167, %r164; xor.b32 %r169, %r168, %r165; add.s32 %r170, %r169, %r154; shl.b32 %r171, %r170, 4; add.s32 %r172, %r171, 1013904242; add.s32 %r173, %r170, %r256; xor.b32 %r174, %r172, %r173; shr.u32 %r175, %r170, 5; add.s32 %r176, %r175, 338241895; xor.b32 %r177, %r174, %r176; add.s32 %r178, %r177, %r162; shl.b32 %r179, %r178, 4; add.s32 %r180, %r179, -1556008596; add.s32 %r181, %r257, -1013904242; add.s32 %r182, %r181, %r178; shr.u32 %r183, %r178, 5; add.s32 %r184, %r183, -939442524; xor.b32 %r185, %r184, %r180; xor.b32 %r186, %r185, %r182; add.s32 %r187, %r186, %r170; shl.b32 %r188, %r187, 4; add.s32 %r189, %r188, 1013904242; add.s32 %r190, %r187, %r181; xor.b32 %r191, %r189, %r190; shr.u32 %r192, %r187, 5; add.s32 %r193, %r192, 338241895; xor.b32 %r194, %r191, %r193; add.s32 %r195, %r194, %r178; shl.b32 %r196, %r195, 4; add.s32 %r197, %r196, -1556008596; add.s32 %r198, %r257, 1640531527; add.s32 %r199, %r198, %r195; shr.u32 %r200, %r195, 5; add.s32 %r201, %r200, -939442524; xor.b32 %r202, %r201, %r197; xor.b32 %r203, %r202, %r199; add.s32 %r204, %r203, %r187; shl.b32 %r205, %r204, 4; add.s32 %r206, %r205, 1013904242; add.s32 %r207, %r204, %r198; xor.b32 %r208, %r206, %r207; shr.u32 %r209, %r204, 5; add.s32 %r210, %r209, 338241895; xor.b32 %r211, %r208, %r210; add.s32 %r212, %r211, %r195; shl.b32 %r213, %r212, 4; add.s32 %r214, %r213, -1556008596; add.s32 %r215, %r257, %r212; shr.u32 %r216, %r212, 5; add.s32 %r217, %r216, -939442524; xor.b32 %r218, %r217, %r214; xor.b32 %r219, %r218, %r215; add.s32 %r260, %r219, %r204; shl.b32 %r220, %r260, 4; add.s32 %r221, %r220, 1013904242; add.s32 %r222, %r260, %r257; xor.b32 %r223, %r221, %r222; shr.u32 %r224, %r260, 5; add.s32 %r225, %r224, 338241895; xor.b32 %r226, %r223, %r225; add.s32 %r259, %r226, %r212; add.s32 %r257, %r257, -239350328; add.s32 %r256, %r256, -239350328; add.s32 %r255, %r255, -239350328; add.s32 %r254, %r254, -239350328; add.s32 %r253, %r253, -239350328; add.s32 %r252, %r252, -239350328; add.s32 %r258, %r258, 8; setp.ne.s32 %p34, %r258, 10485760; @%p34 bra $L__BB0_41; st.global.u32 [%rd3], %r260; st.global.u32 [%rd3+4], %r259; ~~~ 分析完汇编写出伪代码 ~~~python totals = [-1640531527, 1013904242, -626627285, 2027808484, 387276957, -239350328-1013904242, -239350328+1640531527, -239350328] for i in range(10485760): for j in range(8): v1 += (((v0<<4)-1556008596)^((v0>>5)-939442524))^(v0+totals[j]) v0 += (((v1<<4)+1013904242)^((v1>>5)+338241895))^(v1+totals[j]) totals = [total-239350328 for total in totals] ~~~ 注意末尾还有个异或id 先写个简单python脚本逆向256字节大小的flag\_enc,然后来让gpt加速 ~~~python from ctypes import c_uint32 with open("flag_enc", "rb") as f: data = f.read() data = [i^data[i] for i in range(256)] print(list(map(hex, data))) data = bytes(data) def dec_tea(data): v0, v1 = c_uint32(data[0]), c_uint32(data[1]) totals = [c_uint32(i-239350328*10485760//8) for i in [-1640531527, 1013904242, -626627285, 2027808484, 387276957, -239350328-1013904242, -239350328+1640531527, -239350328]][::-1] for i in range(10485760//8): for total in totals: total.value += 239350328 for j in range(8): v1.value -= (((v0.value<<4)+1013904242)^((v0.value>>5)+338241895))^(v0.value+totals[j].value) v0.value -= (((v1.value<<4)-1556008596)^((v1.value>>5)-939442524))^(v1.value+totals[j].value) return v0.value, v1.value # rev gift 5 data = [int.from_bytes(data[i:i+4], byteorder="little") for i in range(0, len(data), 4)] for i in range(0, len(data), 2): data[i:i+2] = dec_tea(data[i:i+2]) data = list(b"".join([i.to_bytes(4, byteorder="little") for i in data])) print(list(map(hex, data))) # data = ['0xda', '0x95', '0x7a', '0x90', '0x7b', '0x46', '0xf', '0x8f', '0xef', '0x0', '0x4a', '0xd1', '0x77', '0x9b', '0xec', '0x50', '0xbc', '0x29', '0x30', '0x44', '0xfe', '0x26', '0x5', '0x2f', '0x8b', '0x37', '0x9f', '0xd7', '0xe2', '0x2', '0x49', '0xb1', '0x80', '0x4', '0x1', '0x62', '0xea', '0xcf', '0xa6', '0x2f', '0x6', '0xcc', '0xf8', '0x90', '0x23', '0xc9', '0x28', '0xb', '0x8b', '0xc9', '0x48', '0xa9', '0x93', '0x27', '0x43', '0x3e', '0x43', '0xd8', '0x17', '0xe8', '0xae', '0x21', '0x5', '0x40', '0x19', '0x8', '0x5c', '0x13', '0x2b', '0x29', '0x94', '0x8b', '0x3', '0x12', '0xd8', '0x36', '0x68', '0x69', '0x99', '0x6', '0xf', '0x6', '0x79', '0xab', '0x69', '0x9f', '0x4a', '0x88', '0x8d', '0xd8', '0xaa', '0xce', '0x3a', '0x98', '0x43', '0x37', '0xf5', '0x3', '0x56', '0xc6', '0x83', '0xef', '0xbe', '0x46', '0x6d', '0x62', '0x55', '0x9e', '0xed', '0xd7', '0x79', '0x9f', '0x19', '0xba', '0xf0', '0xa1', '0x7', '0x5d', '0xa8', '0xbd', '0x9b', '0x47', '0xcb', '0x2e', '0x14', '0x26', '0xb6', '0x33', '0xf2', '0x97', '0x71', '0x2d', '0x90', '0x75', '0xd9', '0x94', '0x4d', '0x46', '0x84', '0x44', '0x87', '0x69', '0xe9', '0xfd', '0xba', '0x1b', '0x10', '0xbe', '0x56', '0xfc', '0xb7', '0x4c', '0x71', '0x41', '0x63', '0x97', '0x85', '0xa5', '0x72', '0xbd', '0xc8', '0x80', '0xdd', '0xae', '0xab', '0x93', '0x3b', '0x3c', '0x30', '0xe0', '0x25', '0x8e', '0xbb', '0x3c', '0x1f', '0x99', '0x5d', '0x64', '0x66', '0x4b', '0x7e', '0xb1', '0x16', '0x67', '0xa0', '0x3a', '0x56', '0x91', '0x4c', '0xe6', '0xa1', '0x1e', '0xa4', '0x78', '0xbc', '0xc2', '0xe5', '0xd', '0x63', '0x3d', '0x25', '0x95', '0xcc', '0x35', '0x26', '0x84', '0x4b', '0xe4', '0x17', '0xd1', '0xe0', '0x60', '0x68', '0x58', '0x31', '0x39', '0xd7', '0x7d', '0x24', '0x7f', '0xde', '0x88', '0xb0', '0x41', '0xca', '0xee', '0xeb', '0x21', '0xbe', '0x72', '0x9', '0xba', '0x4a', '0x7e', '0x8b', '0x6a', '0xf2', '0x89', '0x98', '0xcd', '0xd9', '0xc6', '0x7e', '0xc3', '0x35', '0x60', '0x77', '0x93', '0x25', '0x4', '0xea', '0xe4', '0xad', '0x14', '0xc2', '0x8e'] # data = [int(i, 16) for i in data] # rev gift 4 for i in range(1, len(data), 2): data[i], data[(i+1)%256] = data[(i+1)%256], data[i] print(list(map(hex, data))) # rev gift 3 for i in range(0, len(data), 2): data[i], data[i+1] = data[i+1], data[i] print(list(map(hex, data))) # rev gift 2 for i in range(255, -1, -1): data[i] ^= data[(i+1)%256] ^ 0xAC print(list(map(hex, data))) # rev gift 1 RT = [82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125] for i in range(len(data)): tmp = data[i] for j in range(10485760*5-1, -1, -1): tmp ^= (j%10485760)&0xFF tmp = ((tmp<<4)|(tmp>>4))&0xFF tmp = RT[tmp] tmp = ((tmp<<4)|(tmp>>4))&0xFF data[i] = (tmp ^ (i*73+0xAC)) & 0xFF print(hex(data[i]), end=", ") ~~~ 研究下python优化 * gift1很明显可以查表的 * gift32不要用c\_uint32,开销比较大 归根结底不要用python好了,用C语言+GPT,hhh,给一个AI梭出来的解密代码,cu文件编译下,里面爆破了key实际上可以直接赋值0xAC ~~~c // decrypt_flag.cu // Compile: nvcc -O3 decrypt_flag.cu -o decrypt_flag // Run: ./decrypt_flag flag_enc #include <cuda_runtime.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define BLK 256 #define NITER (10485760u/8u) // 1,310,720 #define DEC 239350328u #define CUDA_CHECK(x) do { \ cudaError_t err = (x); \ if (err != cudaSuccess) { \ fprintf(stderr, "CUDA error %s:%d: %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \ exit(1); \ } \ } while(0) static inline uint8_t nibswap8(uint8_t x){ return (uint8_t)((x >> 4) | (x << 4)); } // --- host copy of T (AES S-box) --- static const uint8_t T_host[256] = { 99,124,119,123,242,107,111,197,48,1,103,43,254,215,171,118, 202,130,201,125,250,89,71,240,173,212,162,175,156,164,114,192, 183,253,147,38,54,63,247,204,52,165,229,241,113,216,49,21, 4,199,35,195,24,150,5,154,7,18,128,226,235,39,178,117, 9,131,44,26,27,110,90,160,82,59,214,179,41,227,47,132, 83,209,0,237,32,252,177,91,106,203,190,57,74,76,88,207, 208,239,170,251,67,77,51,133,69,249,2,127,80,60,159,168, 81,163,64,143,146,157,56,245,188,182,218,33,16,255,243,210, 205,12,19,236,95,151,68,23,196,167,126,61,100,93,25,115, 96,129,79,220,34,42,144,136,70,238,184,20,222,94,11,219, 224,50,58,10,73,6,36,92,194,211,172,98,145,149,228,121, 231,200,55,109,141,213,78,169,108,86,244,234,101,122,174,8, 186,120,37,46,28,166,180,198,232,221,116,31,75,189,139,138, 112,62,181,102,72,3,246,14,97,53,87,185,134,193,29,158, 225,248,152,17,105,217,142,148,155,30,135,233,206,85,40,223, 140,161,137,13,191,230,66,104,65,153,45,15,176,84,187,22 }; // device constant memory version __constant__ uint8_t T_dev[256]; // -------- device LE helpers -------- __device__ __forceinline__ uint32_t rd_u32_le_dev(const uint8_t *p){ return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24); } __device__ __forceinline__ void wr_u32_le_dev(uint8_t *p, uint32_t v){ p[0] = (uint8_t)v; p[1] = (uint8_t)(v >> 8); p[2] = (uint8_t)(v >> 16); p[3] = (uint8_t)(v >> 24); } static inline uint32_t rd_u32_le_host(const uint8_t *p){ return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24); } static inline void wr_u32_le_host(uint8_t *p, uint32_t v){ p[0] = (uint8_t)v; p[1] = (uint8_t)(v >> 8); p[2] = (uint8_t)(v >> 16); p[3] = (uint8_t)(v >> 24); } // mixA/mixB (device) __device__ __forceinline__ uint32_t mixA(uint32_t v, uint32_t sum){ return (((v << 4) + (uint32_t)(0u - 1556008596u)) ^ ((v >> 5) + (uint32_t)(0u - 939442524u)) ^ (sum + v)); } __device__ __forceinline__ uint32_t mixB(uint32_t v, uint32_t sum){ return (((v << 4) + 1013904242u) ^ ((v >> 5) + 338241895u) ^ (sum + v)); } // Stage5 inverse (device) __device__ __forceinline__ void stage5_inv_u64(uint32_t &a, uint32_t &b){ uint32_t sub = (uint32_t)((unsigned long long)(NITER - 1u) * (unsigned long long)DEC); uint32_t s252 = 0x9E3779B9u - sub; uint32_t s253 = 1013904242u - sub; uint32_t s254 = (uint32_t)(0u - 626627285u) - sub; uint32_t s255 = 2027808484u - sub; uint32_t s256 = 387276957u - sub; uint32_t s257 = (uint32_t)(0u - 239350328u) - sub; for (uint32_t it = 0; it < NITER; it++){ uint32_t sum257 = s257; uint32_t sum198 = sum257 + 1640531527u; uint32_t sum181 = sum257 - 1013904242u; b -= mixB(a, sum257); a -= mixA(b, sum257); b -= mixB(a, sum198); a -= mixA(b, sum198); b -= mixB(a, sum181); a -= mixA(b, sum181); b -= mixB(a, s256); a -= mixA(b, s256); b -= mixB(a, s255); a -= mixA(b, s255); b -= mixB(a, s254); a -= mixA(b, s254); b -= mixB(a, s253); a -= mixA(b, s253); b -= mixB(a, s252); a -= mixA(b, s252); s252 += DEC; s253 += DEC; s254 += DEC; s255 += DEC; s256 += DEC; s257 += DEC; } } // GPU: keyless inverse (Stage6^-1, Stage5^-1, swaps^-1) __global__ void keyless_inverse(uint8_t *buf){ int tid = threadIdx.x; size_t base = (size_t)blockIdx.x * (size_t)BLK; size_t idx = base + (size_t)tid; // Stage6 inverse: xor idx (same as forward) buf[idx] ^= (uint8_t)idx; __syncthreads(); // Stage5 inverse: each 8 bytes if ((tid & 7) == 0){ uint8_t *p = buf + base + (size_t)tid; uint32_t a = rd_u32_le_dev(p); uint32_t b = rd_u32_le_dev(p + 4); stage5_inv_u64(a, b); wr_u32_le_dev(p, a); wr_u32_le_dev(p + 4, b); } __syncthreads(); // Undo swaps: reverse order of encryption swaps // encryption odd stage: swap (1,2)(3,4)... plus (255,0) if (tid == 255){ uint8_t t = buf[base + 255]; buf[base + 255] = buf[base + 0]; buf[base + 0] = t; } else if ((tid & 1) && tid != 255){ uint8_t t = buf[base + (size_t)tid]; buf[base + (size_t)tid] = buf[base + (size_t)tid + 1]; buf[base + (size_t)tid + 1] = t; } __syncthreads(); // encryption even stage: swap (0,1)(2,3)... if ((tid & 1) == 0 && tid != 255){ uint8_t t = buf[base + (size_t)tid]; buf[base + (size_t)tid] = buf[base + (size_t)tid + 1]; buf[base + (size_t)tid + 1] = t; } __syncthreads(); } // ---------------- CPU fast inverse (bruteforce key) ---------------- static void compose_perm(uint8_t out[256], const uint8_t f[256], const uint8_t g[256]){ for(int i=0;i<256;i++) out[i] = f[g[i]]; } static void perm_pow(uint8_t out[256], const uint8_t base[256], uint32_t exp){ uint8_t result[256], cur[256], tmp[256]; for(int i=0;i<256;i++){ result[i]=(uint8_t)i; cur[i]=base[i]; } while(exp){ if(exp & 1u){ compose_perm(tmp, cur, result); memcpy(result, tmp, 256); } exp >>= 1u; if(exp){ compose_perm(tmp, cur, cur); memcpy(cur, tmp, 256); } } memcpy(out, result, 256); } static void build_stage1_Hinv(uint8_t Hinv[256]){ // one 256-step cycle (counter_low8 runs 0..255) uint8_t cycle[256]; for(int x=0;x<256;x++){ uint8_t s = (uint8_t)x; for(int t=0;t<256;t++){ s = (uint8_t)(nibswap8(T_host[s]) ^ (uint8_t)t); } cycle[x] = s; } // 10485760 = 256*40960. repeated 5 times => cycle^(40960*5) uint8_t pow40960[256]; perm_pow(pow40960, cycle, 40960u); uint8_t total[256], tmp[256]; memcpy(total, pow40960, 256); for(int i=0;i<4;i++){ compose_perm(tmp, pow40960, total); memcpy(total, tmp, 256); } for(int i=0;i<256;i++){ Hinv[ total[i] ] = (uint8_t)i; } } static void stage2_inv_block(uint8_t block[256], uint8_t key){ uint8_t y[256]; memcpy(y, block, 256); uint8_t x[256]; x[255] = (uint8_t)(y[255] ^ key ^ y[0]); for(int i=254;i>=0;i--){ x[i] = (uint8_t)(y[i] ^ key ^ x[i+1]); } memcpy(block, x, 256); } static void stage1_inv_block(uint8_t block[256], size_t base_idx, uint8_t key, const uint8_t Hinv[256]){ for(int i=0;i<256;i++){ uint32_t idx = (uint32_t)(base_idx + (size_t)i); uint8_t t = block[i]; t = Hinv[t]; t = nibswap8(t); t ^= (uint8_t)((idx * 73u + (uint32_t)key) & 0xFFu); block[i] = t; } } static uint8_t *read_file(const char *path, size_t *sz_out){ FILE *fp = fopen(path, "rb"); if(!fp){ perror("fopen"); return NULL; } fseek(fp, 0, SEEK_END); long sz = ftell(fp); fseek(fp, 0, SEEK_SET); if(sz <= 0){ fclose(fp); return NULL; } uint8_t *buf = (uint8_t*)malloc((size_t)sz); if(!buf){ fclose(fp); return NULL; } if(fread(buf, 1, (size_t)sz, fp) != (size_t)sz){ fclose(fp); free(buf); return NULL; } fclose(fp); *sz_out = (size_t)sz; return buf; } static int write_file(const char *path, const uint8_t *buf, size_t sz){ FILE *fp = fopen(path, "wb"); if(!fp){ perror("fopen"); return 0; } if(fwrite(buf, 1, sz, fp) != sz){ fclose(fp); return 0; } fclose(fp); return 1; } static int is_png_header(const uint8_t *p, size_t n){ static const uint8_t sig[8] = {0x89,'P','N','G',0x0D,0x0A,0x1A,0x0A}; if(n < 8) return 0; return memcmp(p, sig, 8) == 0; } int main(int argc, char **argv){ const char *path = (argc >= 2) ? argv[1] : "flag_enc"; CUDA_CHECK(cudaMemcpyToSymbol(T_dev, T_host, 256)); size_t sz = 0; uint8_t *ct = read_file(path, &sz); if(!ct){ fprintf(stderr, "failed to read %s\n", path); return 1; } // pad to 256 size_t padded = (sz + 255) / 256 * 256; if(padded != sz){ fprintf(stderr, "[!] size not multiple of 256, pad to %zu (orig %zu)\n", padded, sz); ct = (uint8_t*)realloc(ct, padded); memset(ct + sz, 0, padded - sz); } size_t nblocks = padded / 256; // GPU keyless inverse once uint8_t *d = NULL; CUDA_CHECK(cudaMalloc((void**)&d, padded)); CUDA_CHECK(cudaMemcpy(d, ct, padded, cudaMemcpyHostToDevice)); keyless_inverse<<<(unsigned)nblocks, 256>>>(d); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaMemcpy(ct, d, padded, cudaMemcpyDeviceToHost)); CUDA_CHECK(cudaFree(d)); // build Hinv uint8_t Hinv[256]; build_stage1_Hinv(Hinv); // brute force key uint8_t *tmp = (uint8_t*)malloc(padded); if(!tmp){ fprintf(stderr, "oom\n"); free(ct); return 1; } char flagbuf[512]; for(int k=0;k<256;k++){ memcpy(tmp, ct, padded); for(size_t b=0;b<nblocks;b++){ uint8_t block[256]; memcpy(block, tmp + b*256, 256); stage2_inv_block(block, (uint8_t)k); stage1_inv_block(block, b*256, (uint8_t)k, Hinv); memcpy(tmp + b*256, block, 256); } if(is_png_header(tmp, padded)){ printf("[+] found key = 0x%02X (%d)\n", k, k); if(write_file("flag_dec.png", tmp, sz)){ printf("[+] wrote decrypted PNG: flag_dec.png\n"); } else { fprintf(stderr, "failed to write flag_dec.png\n"); } free(tmp); free(ct); return 0; } } printf("[-] no flag found. maybe flag format differs.\n"); free(tmp); free(ct); return 0; } ~~~ ## 小结 10个月后拐回头看别有一番滋味在心头,短短大半年的时间,AI的发展已经能够到达秒ptx汇编并完成解密代码的地步,当初人工钻研了一整天才把逻辑全逆向出来,然后写加速解密代码废了特别大劲,印象很深当时甚至拿着写好的多线程代码去找其他人好的设备跑代码,结果现在ai写的解密代码跑一会就出来了,哎。是时候重新审视自己的目标和规划了。 虽然远远比不过AI了,但这十个月进步不小,当初汇编都看不懂还要靠ds分析,现在自己能很快的手动分析并还原伪代码了,很多思路都能很快对上了。 假如有线下断网做easy-cuda这样的题就好了,这种cuda加密我算基本学会了hhh 最后修改:2025 年 12 月 29 日 © 允许规范转载 打赏 赞赏作者 支付宝微信 赞 如果觉得我的文章对你有用,请随意赞赏