R
Ram Prasad
I have a system that gets jobs in files which are stored in a
directory tree structure.
To get the current job queue size , I simply have to count all files
in a particular directory ( including sub dirs )
The queue size may be upto 2 million files
I can get the size by using
find /path -type f | wc -l
But this is not fast enough. So I wrote a small directory search
script to just count the number of files , can I optimize this
further. Currently the script takes longer than optimal
0.7 s for a queue size of 300 k
The script will always run only on linux .. so I dont bother about
compatibility anyway.
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <string.h>
#if STAT_MACROS_BROKEN
# undef S_ISDIR
#endif
#define MAXPATH 1000
#if !defined S_ISDIR && defined S_IFDIR
# define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR)
#endif
/* I Think this function is the bottleneck */
int isdir (const char *path){
struct stat stats;
return stat (path, &stats) == 0 && S_ISDIR (stats.st_mode);
}
int dirnscan (const char *path){
char fullpath[MAXPATH];
DIR *dp;
struct dirent *ep;
int n=0;
dp = opendir (path);
if(dp==NULL) return 0;
while ((ep = readdir (dp))){
if(ep->d_name[0] == '.') continue;
sprintf(fullpath,"%s/%s",path,ep->d_name);
if(isdir(fullpath) == 0){
++n;
} else {
n = n + dirnscan(fullpath);
}
}
closedir(dp);
return(n);
}
int main(int argc,char *argv[]){
printf("%d\n",dirnscan(argv[1]));
return(0);
}
directory tree structure.
To get the current job queue size , I simply have to count all files
in a particular directory ( including sub dirs )
The queue size may be upto 2 million files
I can get the size by using
find /path -type f | wc -l
But this is not fast enough. So I wrote a small directory search
script to just count the number of files , can I optimize this
further. Currently the script takes longer than optimal
0.7 s for a queue size of 300 k
The script will always run only on linux .. so I dont bother about
compatibility anyway.
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <string.h>
#if STAT_MACROS_BROKEN
# undef S_ISDIR
#endif
#define MAXPATH 1000
#if !defined S_ISDIR && defined S_IFDIR
# define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR)
#endif
/* I Think this function is the bottleneck */
int isdir (const char *path){
struct stat stats;
return stat (path, &stats) == 0 && S_ISDIR (stats.st_mode);
}
int dirnscan (const char *path){
char fullpath[MAXPATH];
DIR *dp;
struct dirent *ep;
int n=0;
dp = opendir (path);
if(dp==NULL) return 0;
while ((ep = readdir (dp))){
if(ep->d_name[0] == '.') continue;
sprintf(fullpath,"%s/%s",path,ep->d_name);
if(isdir(fullpath) == 0){
++n;
} else {
n = n + dirnscan(fullpath);
}
}
closedir(dp);
return(n);
}
int main(int argc,char *argv[]){
printf("%d\n",dirnscan(argv[1]));
return(0);
}