C program to count occurences of substrings in strings

J

JD

Hi guys

I'm trying to write a program that counts the occurrences of HTML tags
in a text file. This is what I have so far:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MB 1048576

int CountString(char *, char *);

int main(int argc, char **argv)
{
char buf[MB];
FILE *f;
char *name;
char *p;
int lines;
int count[6] = {0, 0, 0, 0, 0, 0};
int i;

if (argc == 1) {
printf("You need to specify a file on the command line\n");
return 0;
}

name = argv[1];

if ((f = fopen(name, "r")) == NULL) {
printf("Couldn't open '%s' for reading!\n", name);
return 1;
}

lines = 0;
i = 0;

while(fgets(buf, MB, f) != NULL) {

lines++;
if ((p = strrchr(buf, '\n')) != NULL) { *p = '\0'; }
/* printf("%s\n", buf); */

count[0] += CountString(buf, "<table");
count[1] += CountString(buf, "</table>");
count[2] += CountString(buf, "<tr");
count[3] += CountString(buf, "</tr>");
count[4] += CountString(buf, "<td");
count[5] += CountString(buf, "</td>");
}

for (i = 0; i < 6; i++) {
printf("count[%d] = %d\n", i, count);
}

fclose(f);
return 0;
}

int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;

while (strlen(p) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
p++;
}

return count;
}

If I run it on this test page:


<html>
<head>
<title>Test</title>
</head>
<body>

<table width="100%" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
</table>
</body>
</html>

It gives:

count[0] = 59
count[1] = 1
count[2] = 0
count[3] = 0
count[4] = 0
count[5] = 0

Which is clearly not correct. Can anyone give me any pointers as to what
I'm doing wrong?

Thanks
 
S

slebetman

JD said:
int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;

while (strlen(p) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
p++;
}

return count;
}

If I run it on this test page:


<html>
<head>
<title>Test</title>
</head>
<body>

<table width="100%" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
</table>
</body>
</html>

It gives:

count[0] = 59
count[1] = 1
count[2] = 0
count[3] = 0
count[4] = 0
count[5] = 0

Which is clearly not correct. Can anyone give me any pointers as to what
I'm doing wrong?

Thanks

You might want to try:

int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;

while (strlen(p) >= length) {
if (strncmp(p, str, length) == 0) { count++; }
p++;
}

return count;
}

You only advanced 'p', 'buf' remains at the same location in the
string.
I personally prefer:

while (strlen(buf) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
buf++;
}

The 'p' pointer is not strictly necessary.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,764
Messages
2,569,564
Members
45,039
Latest member
CasimiraVa

Latest Threads

Top