J
JD
Hi guys
I'm trying to write a program that counts the occurrences of HTML tags
in a text file. This is what I have so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MB 1048576
int CountString(char *, char *);
int main(int argc, char **argv)
{
char buf[MB];
FILE *f;
char *name;
char *p;
int lines;
int count[6] = {0, 0, 0, 0, 0, 0};
int i;
if (argc == 1) {
printf("You need to specify a file on the command line\n");
return 0;
}
name = argv[1];
if ((f = fopen(name, "r")) == NULL) {
printf("Couldn't open '%s' for reading!\n", name);
return 1;
}
lines = 0;
i = 0;
while(fgets(buf, MB, f) != NULL) {
lines++;
if ((p = strrchr(buf, '\n')) != NULL) { *p = '\0'; }
/* printf("%s\n", buf); */
count[0] += CountString(buf, "<table");
count[1] += CountString(buf, "</table>");
count[2] += CountString(buf, "<tr");
count[3] += CountString(buf, "</tr>");
count[4] += CountString(buf, "<td");
count[5] += CountString(buf, "</td>");
}
for (i = 0; i < 6; i++) {
printf("count[%d] = %d\n", i, count);
}
fclose(f);
return 0;
}
int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;
while (strlen(p) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
p++;
}
return count;
}
If I run it on this test page:
<html>
<head>
<title>Test</title>
</head>
<body>
<table width="100%" border="1" cellspacing="0" cellpadding="0">
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</table>
</body>
</html>
It gives:
count[0] = 59
count[1] = 1
count[2] = 0
count[3] = 0
count[4] = 0
count[5] = 0
Which is clearly not correct. Can anyone give me any pointers as to what
I'm doing wrong?
Thanks
I'm trying to write a program that counts the occurrences of HTML tags
in a text file. This is what I have so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MB 1048576
int CountString(char *, char *);
int main(int argc, char **argv)
{
char buf[MB];
FILE *f;
char *name;
char *p;
int lines;
int count[6] = {0, 0, 0, 0, 0, 0};
int i;
if (argc == 1) {
printf("You need to specify a file on the command line\n");
return 0;
}
name = argv[1];
if ((f = fopen(name, "r")) == NULL) {
printf("Couldn't open '%s' for reading!\n", name);
return 1;
}
lines = 0;
i = 0;
while(fgets(buf, MB, f) != NULL) {
lines++;
if ((p = strrchr(buf, '\n')) != NULL) { *p = '\0'; }
/* printf("%s\n", buf); */
count[0] += CountString(buf, "<table");
count[1] += CountString(buf, "</table>");
count[2] += CountString(buf, "<tr");
count[3] += CountString(buf, "</tr>");
count[4] += CountString(buf, "<td");
count[5] += CountString(buf, "</td>");
}
for (i = 0; i < 6; i++) {
printf("count[%d] = %d\n", i, count);
}
fclose(f);
return 0;
}
int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;
while (strlen(p) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
p++;
}
return count;
}
If I run it on this test page:
<html>
<head>
<title>Test</title>
</head>
<body>
<table width="100%" border="1" cellspacing="0" cellpadding="0">
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</table>
</body>
</html>
It gives:
count[0] = 59
count[1] = 1
count[2] = 0
count[3] = 0
count[4] = 0
count[5] = 0
Which is clearly not correct. Can anyone give me any pointers as to what
I'm doing wrong?
Thanks