strchr函数的介绍及用法
如果要写一个从字符串中查找一个字符的函数,相信你不难想到如下代码:
char* __cdecl strchr_1( char const* _Str, int _Val) { while (*_Str && *_Str != _Val) _Str++; if (*_Str == _Val) return(_Str); return NULL;}
和 strlen 的效率之旅一样,我们先做好测试脚手架:
typedef char* (__cdecl* p_strchr)( char const* _Str, int _Val); // 测试正确性void test_function( char* funName, p_strchr function, char* str, char ch, int expectIndex) { int got = function(str, ch) - (int)str; printf( "func [%s] test value [%s] find char [%c]," " expect [%d], got [%d], %s\n", funName, str, ch, got, expectIndex, got == expectIndex ? "true" : "false" );} // 正确性测试用例void test_functions(char* funName, p_strchr function) { struct test_item { char* str; char val; int expect_index; } items[] = { // return NULL, expect nagtive address of "a" { "a", 'b', -(int)"a" }, // last is '\0', returns 1 { "a", '\0', 1 }, { "ab", 'a', 0 }, { "ab", 'b', 1 }, {"abc", 'b', 1 } }; int size = sizeof(items) / sizeof(struct test_item); for (int i = 0; i < size; i++) { test_function( funName, function, items[i].str, items[i].val, items[i].expect_index); }}
放入main函数执行,执行结果如下:
可以看到,我们获取到了我们想要的效果,那么接下来再来一个效率对比,我们的测试脚手架如下:
#define BUFF_SIZE 100000000char buff[BUFF_SIZE + 1]; // 100M + 1BYTEvoid test_function_prof( char* funName, p_strchr function, char* str, char ch) { ULONGLONG start = 0; ULONGLONG end = 0; start = GetTickCount64(); function(str, ch); end = GetTickCount64(); printf( "test func [%s] start [%lld], end [%lld], cost: [%lld]\n", funName, start, end, end - start );} void test_profs() { // init int size = BUFF_SIZE; for (int i = 0; i < size; i++) { buff[i] = 'a'; } buff[BUFF_SIZE - 1] = 'b'; buff[BUFF_SIZE] = '\0'; test_function_prof("strchar_1", strchr_1, buff, 'b');}
在主函数中调用 test_profs 函数,得到结果如下:
一个100M长的字符串,查找到结尾需要 156ms,那么系统自带的 strchr 函数表现如何呢?向 test_profs 函数添加如下代码:
test_function_prof("strchar", strchr, buff, 'b');
得到结果如下:
哇,差距挺大的,居然差了8.75(140/16)倍,那么效率何来?我们到源码中找答案,找到系统 strchr 函数的实现,我们获取到如下代码:
page ,132 title strchr - search string for given character;***;strchr.asm - search a string for a given character;; Copyright (c) Microsoft Corporation. All rights reserved.;;Purpose:; defines strchr() - search a string for a character;;******************************************************************************* .xlist include vcruntime.inc .list page;***;char *strchr(string, chr) - search a string for a character;;Purpose:; Searches a string for a given character, which may be the; null character '\0'.;; Algorithm:; char *; strchr (string, chr); char *string, chr;; {; while (*string && *string != chr); string++;; if (*string == chr); return(string);; return((char *)0);; };;Entry:; char *string - string to search in; char chr - character to search for;;Exit:; returns pointer to the first occurence of c in string; returns NULL if chr does not occur in string;;Uses:;;Exceptions:;;******************************************************************************* CODESEG align 16 public strchr, __from_strstr_to_strchrstrchr proc \ string:ptr byte, \ chr:byte OPTION PROLOGUE:NONE, EPILOGUE:NONE .FPO ( 0, 2, 0, 0, 0, 0 ) ; Include SSE2 code path for platforms that support it. include strchr_sse.inc xor eax,eax mov al,[esp + 8] ; al = chr (search char) __from_strstr_to_strchr label proc push ebx ; pSERVE EBX mov ebx,eax ; ebx = 0/0/0/chr shl eax,8 ; eax = 0/0/chr/0 mov edx,[esp + 8] ; edx = buffer test edx,3 ; test if string is aligned on 32 bits jz short main_loop_start str_misaligned: ; simple byte loop until string is aligned mov cl,[edx] add edx,1 cmp cl,bl je short found_bx test cl,cl jz short retnull_bx test edx,3 ; now aligned ? jne short str_misaligned main_loop_start: ; set all 4 bytes of ebx to [chr] or ebx,eax ; ebx = 0/0/chr/chr push edi ; pSERVE EDI mov eax,ebx ; eax = 0/0/chr/chr shl ebx,10h ; ebx = chr/chr/0/0 push esi ; pSERVE ESI or ebx,eax ; ebx = all 4 bytes = [chr] ; in the main loop (below), we are looking for chr or for EOS (end of string) main_loop: mov ecx,[edx] ; read dword (4 bytes) mov edi,7efefeffh ; work with edi & ecx for looking for chr mov eax,ecx ; eax = dword mov esi,edi ; work with esi & eax for looking for EOS xor ecx,ebx ; eax = dword xor chr/chr/chr/chr add esi,eax add edi,ecx xor ecx,-1 xor eax,-1 xor ecx,edi xor eax,esi add edx,4 and ecx,81010100h ; test for chr jnz short chr_is_found ; chr probably has been found ; chr was not found, check for EOS and eax,81010100h ; is any flag set ?? jz short main_loop ; EOS was not found, go get another dword and eax,01010100h ; is it in high byte? jnz short retnull ; no, definitely found EOS, return failure and esi,80000000h ; check was high byte 0 or 80h jnz short main_loop ; it just was 80h in high byte, go get ; another dwordretnull: pop esi pop ediretnull_bx: pop ebx xor eax,eax ret ; _cdecl return found_bx: lea eax,[edx - 1] pop ebx ; restore ebx ret ; _cdecl return chr_is_found: mov eax,[edx - 4] ; let's look one more time on this dword cmp al,bl ; is chr in byte 0? je short byte_0 test al,al ; test if low byte is 0 je retnull cmp ah,bl ; is it byte 1 je short byte_1 test ah,ah ; found EOS ? je retnull shr eax,10h ; is it byte 2 cmp al,bl je short byte_2 test al,al ; if in al some bits were set, bl!=bh je retnull cmp ah,bl je short byte_3 test ah,ah jz retnull jmp short main_loop ; neither chr nor EOS found, go get ; another dwordbyte_3: pop esi pop edi lea eax,[edx - 1] pop ebx ; restore ebx ret ; _cdecl return byte_2: lea eax,[edx - 2] pop esi pop edi pop ebx ret ; _cdecl return byte_1: lea eax,[edx - 3] pop esi pop edi pop ebx ret ; _cdecl return byte_0: lea eax,[edx - 4] pop esi ; restore esi pop edi ; restore edi pop ebx ; restore ebx ret ; _cdecl return strchr endp end
其中,
78-86行进行地址32位对齐(硬件访问32位对齐地址更快);
88-94行我们将ebx(32位4字节)中的四个字节均设置为要查找的字符;
98-129行是对字符串的迭代查找目标字符;
在循环中,我们看到了几个特殊的值,分别列出值以及其二进制位如下:
0x7efefeff -> 0b 1111 1110 1111 1110 1111 1110 1111 11110x81010100 -> 0b 1000 0001 0000 0001 0000 0001 0000 00000x01010100 -> 0b 0000 0001 0000 0001 0000 0001 0000 00000x80000000 -> 0b 1000 0000 0000 0000 0000 0000 0000 0000-1 -> 0b 1111 1111 1111 1111 1111 1111 1111 1111
我们先看下对 ecx 寄存器的操作:
1、读取查询字符串的四个字节(这里因为是 ASCII 字符,所以是四个字符);
2、与 ebx(chr/chr/chr/chr) 异或;
3、按位取反(xor ecx, -1);
4、edi = 0x7efefeff + ecx
5、异或edi;
6、按位与 0x81010100;
7、跳转到依次判断各个字节是否位目标字符的逻辑;
从以上的逻辑,我们可以看到,117行判断决定了是否有可能找到了目标字符。那么是怎么做到的呢?
首先,我们假设 ecx 所有字节均不是目标值,则执行 xor ecx,ebx 之后,所有字节均不为0;
第二步,按位取反,则 ecx 所有字节均不为 0;
第三步,edi = 0x7efefeff + ecx,这一步参考 strlen 的效率之旅 中对 '\0' 的判断;
第四步,判断 ecx & 81010100 的值,如果结果不为0,则可能找到了对应字符,跳转到字节判断逻辑(chr_is_found);
第五步,如果走到了122行,则说明肯定没有找到目标,这里判断是否到了查找字符串结尾,逻辑和 strlen 的效率之旅 中判断字符串结尾逻辑相同,每次读取并处理了四个字节。
那么就不难理解,效率肯定比之前单字节匹配的速度更快,但......为什么是 8 倍而不是 4 倍呢?
仔细看代码之后,发现有这么一句:
; Include SSE2 code path for platforms that support it. include strchr_sse.inc
由于没有找到对应源代码,所以这里我们通过反汇编的方式,获取到strchr函数真正执行的代码来看,反汇编结果如下:
cmp dword ptr ds:[7BEB92DCh],1 ; 7BEA42E0 jb 7BEA4348 ; 7BEA42E7 movzx eax,byte ptr [esp+8] ; 7BEA42E9 mov edx,eax ; 7BEA42EE shl eax,8 ; 7BEA42F0 or edx,eax ; 7BEA42F3 movd xmm3,edx ; 7BEA42F5 pshuflw xmm3,xmm3,0 ; 7BEA42F9 movlhps xmm3,xmm3 ; 7BEA42FE mov edx,dword ptr [esp+4] ; 7BEA4301 mov ecx,0Fh ; 7BEA4305 or eax,0FFFFFFFFh ; 7BEA430A and ecx,edx ; 7BEA430D shl eax,cl ; 7BEA430F sub edx,ecx ; 7BEA4311 movdqu xmm1,xmmword ptr [edx] ; 7BEA4313 pxor xmm2,xmm2 ; 7BEA4317 pcmpeqb xmm2,xmm1 ; 7BEA431B pcmpeqb xmm1,xmm3 ; 7BEA431F por xmm2,xmm1 ; 7BEA4323 pmovmskb ecx,xmm2 ; 7BEA4327 and ecx,eax ; 7BEA432B jne 7BEA4337 ; 7BEA432D or eax,0FFFFFFFFh ; 7BEA432F add edx,10h ; 7BEA4332 jmp 7BEA4313 ; 7BEA4335 bsf eax,ecx ; 7BEA4337 add eax,edx ; 7BEA433A movd edx,xmm3 ; 7BEA433C xor ecx,ecx ; 7BEA4340 cmp dl,byte ptr [eax] ; 7BEA4342 cmovne eax,ecx ; 7BEA4344 ret ; 7BEA4347 xor eax,eax ; 7BEA4348 mov al,byte ptr [esp+8] ; 7BEA434A push ebx ; 7BEA434E mov ebx,eax ; 7BEA434F shl eax,8 ; 7BEA4351 mov edx,dword ptr [esp+8] ; 7BEA4354 test edx,3 ; 7BEA4358 je 7BEA4375 ; 7BEA435E mov cl,byte ptr [edx] ; 7BEA4360 add edx,1 ; 7BEA4362 cmp cl,bl ; 7BEA4365 je 7BEA43C2 ; 7BEA4367 test cl,cl ; 7BEA4369 je 7BEA43BE ; 7BEA436B test edx,3 ; 7BEA436D jne 7BEA4360 ; 7BEA4373 or ebx,eax ; 7BEA4375 push edi ; 7BEA4377 mov eax,ebx ; 7BEA4378 shl ebx,10h ; 7BEA437A push esi ; 7BEA437D or ebx,eax ; 7BEA437E mov ecx,dword ptr [edx] ; 7BEA4380 mov edi,7EFEFEFFh ; 7BEA4382 mov eax,ecx ; 7BEA4387 mov esi,edi ; 7BEA4389 xor ecx,ebx ; 7BEA438B add esi,eax ; 7BEA438D add edi,ecx ; 7BEA438F xor ecx,0FFFFFFFFh ; 7BEA4391 xor eax,0FFFFFFFFh ; 7BEA4394 xor ecx,edi ; 7BEA4397 xor eax,esi ; 7BEA4399 add edx,4 ; 7BEA439B and ecx,81010100h ; 7BEA439E jne 7BEA43C7 ; 7BEA43A4 and eax,81010100h ; 7BEA43A6 je 7BEA4380 ; 7BEA43AB and eax,1010100h ; 7BEA43AD jne 7BEA43BC ; 7BEA43B2 and esi,80000000h ; 7BEA43B4 jne 7BEA4380 ; 7BEA43BA pop esi ; 7BEA43BC pop edi ; 7BEA43BD pop ebx ; 7BEA43BE xor eax,eax ; 7BEA43BF ret ; 7BEA43C1 lea eax,[edx-1] ; 7BEA43C2 pop ebx ; 7BEA43C5 ret ; 7BEA43C6 mov eax,dword ptr [edx-4] ; 7BEA43C7 cmp al,bl ; 7BEA43CA je 7BEA4404 ; 7BEA43CC test al,al ; 7BEA43CE je 7BEA43BC ; 7BEA43D0 cmp ah,bl ; 7BEA43D2 je 7BEA43FD ; 7BEA43D4 test ah,ah ; 7BEA43D6 je 7BEA43BC ; 7BEA43D8 shr eax,10h ; 7BEA43DA cmp al,bl ; 7BEA43DD je 7BEA43F6 ; 7BEA43DF test al,al ; 7BEA43E1 je 7BEA43BC ; 7BEA43E3 cmp ah,bl ; 7BEA43E5 je 7BEA43EF ; 7BEA43E7 test ah,ah ; 7BEA43E9 je 7BEA43BC ; 7BEA43EB jmp 7BEA4380 ; 7BEA43ED pop esi ; 7BEA43EF pop edi ; 7BEA43F0 lea eax,[edx-1] ; 7BEA43F1 pop ebx ; 7BEA43F4 ret ; 7BEA43F5 lea eax,[edx-2] ; 7BEA43F6 pop esi ; 7BEA43F9 pop edi ; 7BEA43FA pop ebx ; 7BEA43FB ret ; 7BEA43FC lea eax,[edx-3] ; 7BEA43FD pop esi ; 7BEA4400 pop edi ; 7BEA4401 pop ebx ; 7BEA4402 ret ; 7BEA4403 lea eax,[edx-4] ; 7BEA4404 pop esi ; 7BEA4407 pop edi ; 7BEA4408 pop ebx ; 7BEA4409 ret ; 7BEA440A
本文地址:百科问答频道 https://www.neebe.cn/wenda/903427.html,易企推百科一个免费的知识分享平台,本站部分文章来网络分享,本着互联网分享的精神,如有涉及到您的权益,请联系我们删除,谢谢!