搬:Linux 下 C语言大文件读写(大于4G

    xiaoxiao2025-10-12  5

    Linux 下 C语言大文件读写(大于4G)

    2016-02-18 3300

    因为工作关系,需要从大文件中筛选数据,进行比对。文件大小一般都在6g左右。

    读取大文件有如下两种方法,一是用fopen打开文件,fgetline循环读取,fclose关闭文件;二是用open打开函数,用lseek获取文件大小,用mmap大文件内存映射,用munmap关闭内存映射,用close关闭文件句柄。方式一教慢,就不再详细描述。主要描述方式二。

    方式二,网上介绍也有很多,但是鲜有介绍当大于4G后,读取方法。用long型读取文件大小时,最多是4294967295个字节,也就是4G。解决方法是用long long来读取文件的大小。

    样例代码如下:

    #include<stdio.h> #include<string.h> #include<stdlib.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/mman.h> #include <sys/types.h> #include <errno.h> #include <unistd.h> using namespace std;

    #include "dlist.h"

    //呼叫流程数组 typedef struct s_callflow {    char identifier[100];    char billingid[200];    bool response;    s_callflow()    {       strcpy(identifier,"");       strcpy(billingid,"");       response=false;    } } callflow;

    callflow *call_instance(char *identifier,char *billingid ,bool response) {     callflow *call_ptr;     call_ptr = (callflow *)malloc(sizeof(callflow));     if( call_ptr==NULL )         return NULL;         strcpy(call_ptr->identifier,identifier);     strcpy(call_ptr->billingid,billingid);     call_ptr->response = response;     return call_ptr; }

    void updatecall(DList *list,char *identifier) {     if(list==NULL) return;     DListElmt *new_element;     new_element = list->tail;        while(new_element!=NULL)     {         callflow * flow = (callflow *)new_element->data;         if(flow!=NULL)         {             if(strcmp(flow->identifier,identifier)==0 && flow->response==false)             {                 flow->response=true;                 break;             }         }         if( new_element == list->head )         {             break;         }         new_element = new_element->prev;     }   }

    /*destroy */ void destroy(void *data) {     free(data);     return; }

    void output(DList *list,bool calling) {     if(list==NULL) return;     DListElmt *new_element;     new_element=list->head;     int count = list->size;     int response=0;     FILE *out;     if(calling==1)     {         out=fopen("calling.txt","w");     }     else     {         out=fopen("called.txt","w");     }     char buffer[255];     while(new_element!=NULL)     {          callflow * flow = (callflow *)new_element->data;          if(flow!=NULL)          {             if(flow->response==true)             {                response++;             }             else             {                  sprintf(buffer,"billingid=%s identifier=%s\n",flow->billingid,flow->identifier);                  fwrite(buffer,strlen(buffer),1,out);             }          }          new_element=new_element->next;     }      sprintf(buffer,"count=%d response=%d\n",count,response);      fwrite(buffer,strlen(buffer),1,out);      fclose(out); }

    // int main(int argc,char * argv[]) {    //size_t lsize=0;    long long lsize=0;    const char *localpc="16592304";    DList calling_node;    DList called_node;    dlist_init(&calling_node, destroy);    dlist_init(&called_node, destroy);    char opc[3][200];    char dpc[3][200];    char identifier[2][100];    char billingID[2][200];    //FILE * fp=NULL;    //fp = fopen(argv[1],"r");    char *pBuffer=NULL;    char *pStart=NULL,*pEnd=NULL;    int fd = open(argv[1],O_RDONLY);    //size_t nFileSize=0;    //size_t nOffset=0;    //size_t nLineAmount=0;    //struct stat fileState;    //fstat(fd,&fileState);    //nFileSize=fileState.st_size;    long long nFileSize=0;    long long nOffset;    long long nLineAmount;    nFileSize =(long long)lseek(fd,0,SEEK_END);    //nFileSize = (unsigned long)fileState.st_size;    pBuffer=(char *)mmap(NULL,nFileSize,PROT_READ,MAP_SHARED,fd,0);    pEnd=pStart=pBuffer;    char line[2048];    int flag;    flag=-1;    int in=-1;    printf("nFileSize=%lld\n",nFileSize);    int load = 0;    int preload = 0;    while(lsize<nFileSize-4)    {        lsize++;        load = (int) (((float)lsize / (float)nFileSize) * 100);        if(preload!=load)        {           printf("= \n",load);           preload = load;           sleep(1);           printf("\b");        }        char a = *pEnd;        if(a==13)        {           char b = *(++pEnd);           if(b==10)           {                int len = pEnd-pStart;                if(len<1) break;                if(len>2047) break;                strncpy(line,pStart,len);                pStart=pEnd+1;                if(strstr(line," OPC")!=NULL)                {                    sscanf(line,"%s%s%s",opc[0],opc[1],opc[2]);                }                else if(strstr(line," DPC")!=NULL)                {                    sscanf(line,"%s%s%s",dpc[0],dpc[1],dpc[2]);                }                else if(strstr(line,"queryWithPerm")!=NULL)                {                   flag=0; //                }                else if(strstr(line,"response")!=NULL)                {                   flag=1; //                }                else if(strstr(line,"identifier")!=NULL)                {                    sscanf(line,"%s%s",identifier[0],identifier[1]);                    //                    if(strcmp(opc[1],localpc)==0)                    {                        //                        if(flag==1) //response                        {                             //找主叫流程                             updatecall(&calling_node,identifier[1]);                             //找被叫流程                             updatecall(&called_node,identifier[1]);                             flag=-1;                        }                    }                }                else if(strstr(line,"originationRequest")!=NULL)                {                    //主叫流程                    if(strstr(line,"originationRequestRes")==NULL)                    {                        in = 1;                    }                                   }                else if(strstr(line,"billingID")!=NULL)                {                   sscanf(line,"%s%s",billingID[0],billingID[1]);                   if(in==1)                   {                      callflow * calling = call_instance(identifier[1],billingID[1] ,false);                      dlist_ins_next(&calling_node,calling_node.tail,(void *)calling);                      in = -1;                   }                }                else if(strstr(line,"initial-Termination (38)")!=NULL)                {                    //被叫流程                    callflow * called = call_instance(identifier[1],billingID[1] ,false);                    dlist_ins_next(&called_node,called_node.tail,(void *)called);                }           }        }        else        {           pEnd++;        }          }    output(&calling_node,true);    output(&called_node,false);    dlist_destroy(&calling_node);    dlist_destroy(&called_node);    munmap(pBuffer,nFileSize);    //fclose(fp);    close(fd); }

     

      ---------------------   作者:lzyzuixin   来源:   原文:https://blog.csdn.net/lzyzuixin/article/details/78928915   版权声明:本文为博主原创文章,转载请附上博文链接!

    最新回复(0)