memset效率分析

xiaoxiao2022-07-07 228

glibc库的memset和自写的memset效率对比

glibc下的memset的实现方式是以以下思路进行

#define OPSIZ (sizeof(unsigned long int))

1、长度len小于8的时候，会一位一位进行赋值。 2、长度len大于或者等于8的时候，32位机器按每次4个字节OPSIZ来读写，64位机器按每次8个字节OPSIZ来读写，连续做8次。剩下的接着按每次OPSIZ字节读写，最后是每次1字节读写。即：8 * OPSIZ * x +OPSIZ * y + 1 * z

代码如下：

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <fcntl.h> #include <sys/time.h> #define size_t unsigned int #define op_t unsigned long int #define OPSIZ (sizeof(op_t)) typedef unsigned char byte; #if 0 struct timeval { time_t tv_sec; /* seconds */ suseconds_t tv_usec; /* microseconds */ }; #endif void *lib_memset (void *dstpp, int c, size_t len) { int i = 0; long int dstp = (long int) dstpp; if (len >= 8) { size_t xlen; op_t cccc; cccc = (unsigned char) c;//F4 cccc |= cccc << 8;//F4F4 cccc |= cccc << 16;//F4F4F4F4 if (OPSIZ > 4) /* Do the shift in two steps to avoid warning if long has 32 bits. */ cccc |= (cccc << 16) << 16; //F4F4F4F4F4F4F4F4 /* There are at least some bytes to set. No need to test for LEN == 0 in this alignment loop. */ //如果地址不对齐，则先将地址对齐 while (dstp % OPSIZ != 0) { ((byte *) dstp)[0] = c; dstp += 1; len -= 1; } /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ //按照OPSIZ*8个字节处理 xlen = len / (OPSIZ * 8);//0 while (xlen > 0) { ((op_t *) dstp)[0] = cccc; ((op_t *) dstp)[1] = cccc; ((op_t *) dstp)[2] = cccc; ((op_t *) dstp)[3] = cccc; ((op_t *) dstp)[4] = cccc; ((op_t *) dstp)[5] = cccc; ((op_t *) dstp)[6] = cccc; ((op_t *) dstp)[7] = cccc; dstp += 8 * OPSIZ; xlen -= 1; } len %= OPSIZ * 8;//10 /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ xlen = len / OPSIZ;//1 //按照OPSIZ个字节处理，一次处理8个字节 while (xlen > 0) { ((op_t *) dstp)[0] = cccc; dstp += OPSIZ; xlen -= 1; } len %= OPSIZ;//2 } //处理不能整除的len%OPSIZ个字节，即一个字节一个字节处理 /* Write the last few bytes. */ while (len > 0) { ((byte *) dstp)[0] = c; dstp += 1; len -= 1; } return dstpp; } void *zt_memset(void *s,int c,size_t n) { const unsigned char uc = c; //unsigned char占1字节，意味着只截取c的后八位 unsigned char *su; for(su = s;0 < n;++su,--n) *su = uc; return s; } int main(void) { int i = 0; unsigned char *buff = malloc(1000000); struct timeval start; struct timeval end; gettimeofday(&start, NULL); zt_memset(buff,244,1000000); gettimeofday(&end, NULL); printf("%s %d %d %d\n",__func__,__LINE__,end.tv_sec-start.tv_sec,end.tv_usec-start.tv_usec); gettimeofday(&start, NULL); lib_memset (buff, 244, 1000000); gettimeofday(&end, NULL); printf("%s %d %d %d\n",__func__,__LINE__,end.tv_sec-start.tv_sec,end.tv_usec-start.tv_usec); free(buff); return 0; }

实验结果为：

从结果得知：glibc的实现效率要比自己的实现更高，原因为：自己写的zt_memset是一个字节一个字节处理的，但是lib_memset是按照处理器可以处理的最大字节数进行处理的，即64位一次性可以处理8字节，但是zt_memset一次只处理一个字节

最新回复(0)