397 lines
14 KiB
C
397 lines
14 KiB
C
//-----------------------------------------------------------------------------
|
|
// Copyright (C) 2016, 2017 by piwi
|
|
//
|
|
// This code is licensed to you under the terms of the GNU GPL, version 2 or,
|
|
// at your option, any later version. See the LICENSE.txt file for the text of
|
|
// the license.ch b
|
|
//-----------------------------------------------------------------------------
|
|
// Implements a card only attack based on crypto text (encrypted nonces
|
|
// received during a nested authentication) only. Unlike other card only
|
|
// attacks this doesn't rely on implementation errors but only on the
|
|
// inherent weaknesses of the crypto1 cypher. Described in
|
|
// Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
|
|
// Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
|
|
// Computer and Communications Security, 2015
|
|
//-----------------------------------------------------------------------------
|
|
// some helper functions which can benefit from SIMD instructions or other special instructions
|
|
//
|
|
|
|
#include "hardnested_cpu_dispatch.h"
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#ifdef _MSC_VER
|
|
#include <intrin.h>
|
|
#endif
|
|
|
|
#ifdef X86_SIMD
|
|
// pointers to functions:
|
|
malloc_bitarray_t* malloc_bitarray_function_p = &malloc_bitarray_dispatch;
|
|
free_bitarray_t* free_bitarray_function_p = &free_bitarray_dispatch;
|
|
bitarray_AND_t* bitarray_AND_function_p = &bitarray_AND_dispatch;
|
|
count_bitarray_AND_t* count_bitarray_AND_function_p = &count_bitarray_AND_dispatch;
|
|
count_bitarray_low20_AND_t* count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_dispatch;
|
|
bitarray_AND4_t* bitarray_AND4_function_p = &bitarray_AND4_dispatch;
|
|
bitarray_OR_t* bitarray_OR_function_p = &bitarray_OR_dispatch;
|
|
count_bitarray_AND2_t* count_bitarray_AND2_function_p = &count_bitarray_AND2_dispatch;
|
|
count_bitarray_AND3_t* count_bitarray_AND3_function_p = &count_bitarray_AND3_dispatch;
|
|
count_bitarray_AND4_t* count_bitarray_AND4_function_p = &count_bitarray_AND4_dispatch;
|
|
|
|
crack_states_bitsliced_t* crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
|
|
bitslice_test_nonces_t* bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
|
|
|
|
SIMDExecInstr GetSIMDInstr() {
|
|
SIMDExecInstr instr = SIMD_NONE;
|
|
#ifdef _MSC_VER
|
|
int cpuid[4];
|
|
__cpuid(cpuid, 1);
|
|
if (cpuid[1] >> 16 & 1) instr = SIMD_AVX512;
|
|
else if (cpuid[1] >> 5 & 1) instr = SIMD_AVX2;
|
|
else if (cpuid[2] >> 28 & 1) instr = SIMD_AVX;
|
|
else if (cpuid[3] >> 26 & 1) instr = SIMD_SSE2;
|
|
#else
|
|
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
|
|
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
|
|
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
|
|
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
|
|
#endif
|
|
return instr;
|
|
}
|
|
|
|
static void NoCpu() {
|
|
printf("\nThis program requires at least an SSE2 capable CPU. Exiting...\n");
|
|
exit(4);
|
|
}
|
|
|
|
// determine the available instruction set at runtime and call the correct function
|
|
|
|
uint32_t* malloc_bitarray_dispatch(uint32_t x) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
malloc_bitarray_function_p = &malloc_bitarray_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
malloc_bitarray_function_p = &malloc_bitarray_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
malloc_bitarray_function_p = &malloc_bitarray_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
malloc_bitarray_function_p = &malloc_bitarray_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*malloc_bitarray_function_p)(x);
|
|
}
|
|
|
|
void free_bitarray_dispatch(uint32_t* x) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
free_bitarray_function_p = &free_bitarray_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
free_bitarray_function_p = &free_bitarray_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
free_bitarray_function_p = &free_bitarray_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
free_bitarray_function_p = &free_bitarray_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
(*free_bitarray_function_p)(x);
|
|
}
|
|
|
|
void bitarray_AND_dispatch(uint32_t* A, uint32_t* B) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
bitarray_AND_function_p = &bitarray_AND_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
bitarray_AND_function_p = &bitarray_AND_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
bitarray_AND_function_p = &bitarray_AND_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
bitarray_AND_function_p = &bitarray_AND_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
(*bitarray_AND_function_p)(A, B);
|
|
}
|
|
|
|
uint32_t count_bitarray_AND_dispatch(uint32_t* A, uint32_t* B) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*count_bitarray_AND_function_p)(A, B);
|
|
}
|
|
|
|
uint32_t count_bitarray_low20_AND_dispatch(uint32_t* A, uint32_t* B) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*count_bitarray_low20_AND_function_p)(A, B);
|
|
}
|
|
|
|
void bitarray_AND4_dispatch(uint32_t* A, uint32_t* B, uint32_t* C, uint32_t* D) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
bitarray_AND4_function_p = &bitarray_AND4_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
bitarray_AND4_function_p = &bitarray_AND4_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
bitarray_AND4_function_p = &bitarray_AND4_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
bitarray_AND4_function_p = &bitarray_AND4_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
(*bitarray_AND4_function_p)(A, B, C, D);
|
|
}
|
|
|
|
void bitarray_OR_dispatch(uint32_t* A, uint32_t* B) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
bitarray_OR_function_p = &bitarray_OR_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
bitarray_OR_function_p = &bitarray_OR_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
bitarray_OR_function_p = &bitarray_OR_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
bitarray_OR_function_p = &bitarray_OR_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
(*bitarray_OR_function_p)(A, B);
|
|
}
|
|
|
|
uint32_t count_bitarray_AND2_dispatch(uint32_t* A, uint32_t* B) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*count_bitarray_AND2_function_p)(A, B);
|
|
}
|
|
|
|
uint32_t count_bitarray_AND3_dispatch(uint32_t* A, uint32_t* B, uint32_t* C) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*count_bitarray_AND3_function_p)(A, B, C);
|
|
}
|
|
|
|
uint32_t count_bitarray_AND4_dispatch(uint32_t* A, uint32_t* B, uint32_t* C, uint32_t* D) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*count_bitarray_AND4_function_p)(A, B, C, D);
|
|
}
|
|
|
|
uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t* best_first_bytes, statelist_t* p, uint32_t* keys_found, uint64_t* num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t* bf_test_nonce_2nd_byte, noncelist_t* nonces) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
|
|
}
|
|
|
|
void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t* bf_test_nonce, uint8_t* bf_test_nonce_par) {
|
|
switch (GetSIMDInstr()) {
|
|
case SIMD_AVX512:
|
|
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
|
|
break;
|
|
case SIMD_AVX2:
|
|
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
|
|
break;
|
|
case SIMD_AVX:
|
|
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
|
|
break;
|
|
case SIMD_SSE2:
|
|
bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
|
|
break;
|
|
default:
|
|
NoCpu();
|
|
}
|
|
|
|
// call the most optimized function for this CPU
|
|
(*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
|
|
}
|
|
#else
|
|
|
|
malloc_bitarray_t* malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
|
|
free_bitarray_t* free_bitarray_function_p = &free_bitarray_NOSIMD;
|
|
bitarray_AND_t* bitarray_AND_function_p = &bitarray_AND_NOSIMD;
|
|
count_bitarray_AND_t* count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
|
|
count_bitarray_low20_AND_t* count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
|
|
bitarray_AND4_t* bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
|
|
bitarray_OR_t* bitarray_OR_function_p = &bitarray_OR_NOSIMD;
|
|
count_bitarray_AND2_t* count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
|
|
count_bitarray_AND3_t* count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
|
|
count_bitarray_AND4_t* count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
|
|
|
|
crack_states_bitsliced_t* crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
|
|
bitslice_test_nonces_t* bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
|
|
#endif
|
|
/////////////////////////////////////////////////
|
|
// Entries to dispatched function calls
|
|
|
|
inline uint32_t* malloc_bitarray(uint32_t x) {
|
|
return (*malloc_bitarray_function_p)(x);
|
|
}
|
|
|
|
inline void free_bitarray(uint32_t* x) {
|
|
(*free_bitarray_function_p)(x);
|
|
}
|
|
|
|
inline void bitarray_AND(uint32_t* A, uint32_t* B) {
|
|
(*bitarray_AND_function_p)(A, B);
|
|
}
|
|
|
|
inline uint32_t count_bitarray_AND(uint32_t* A, uint32_t* B) {
|
|
return (*count_bitarray_AND_function_p)(A, B);
|
|
}
|
|
|
|
inline uint32_t count_bitarray_low20_AND(uint32_t* A, uint32_t* B) {
|
|
return (*count_bitarray_low20_AND_function_p)(A, B);
|
|
}
|
|
|
|
inline void bitarray_AND4(uint32_t* A, uint32_t* B, uint32_t* C, uint32_t* D) {
|
|
(*bitarray_AND4_function_p)(A, B, C, D);
|
|
}
|
|
|
|
inline void bitarray_OR(uint32_t* A, uint32_t* B) {
|
|
(*bitarray_OR_function_p)(A, B);
|
|
}
|
|
|
|
inline uint32_t count_bitarray_AND2(uint32_t* A, uint32_t* B) {
|
|
return (*count_bitarray_AND2_function_p)(A, B);
|
|
}
|
|
|
|
inline uint32_t count_bitarray_AND3(uint32_t* A, uint32_t* B, uint32_t* C) {
|
|
return (*count_bitarray_AND3_function_p)(A, B, C);
|
|
}
|
|
|
|
inline uint32_t count_bitarray_AND4(uint32_t* A, uint32_t* B, uint32_t* C, uint32_t* D) {
|
|
return (*count_bitarray_AND4_function_p)(A, B, C, D);
|
|
}
|
|
|
|
uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t* best_first_bytes, statelist_t* p, uint32_t* keys_found, uint64_t* num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t* bf_test_nonce_2nd_byte, noncelist_t* nonces) {
|
|
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
|
|
}
|
|
|
|
void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t* bf_test_nonce, uint8_t* bf_test_nonce_par) {
|
|
(*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
|
|
}
|