Peter said:
What other utility? As Tim says, there are many CRC32s... the
background notes on this one happen to stumble out at the top of the
list in response to googling for "zip file crc32 checksum polynomial",
though I'm sure there are easier ways. The binascii docs say its CRC32
is compatible with the Zip file checksum, but they don't describe it
further.
Generally CRCs are described in terms of their "polynomial", though just
quoting that isn't sufficient to describe their behaviour, but if you
happen to know the polynomial for your utility, someone else can
probably point you to a more appropriate routine, or perhaps explain
what you were doing wrong if the binascii one is actually the right one..
-Peter
It was a .DLL written by an employee that has long since
left the company. We want to move the code to Linux for
nightly checking of files. I don't know what to do but
post some long code. See below:
/************************************************************************
INCLUDES
************************************************************************/
#include <windows.h>
#include <string.h>
#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>
#include "filecrc.h"
/************************************************************************
ModuleName: filecrc.c
Author: Syscon Computers - Modified by Barry Weck
Project: PowerBuilder External File CRC function
Date created: May. 18, 1999
Last Modified: Jun. 09, 1999
Module Owner: Syscon Computers
Module description:
This module implements a algorithm for calculating the CRC (Cyclic
Redundency Check) of a binary file. This function is meant to be
compiled as a 16 bit Microsoft Windows (Tm) DLL with either the 16 bit C
compilers of Borland or Microsoft. Compilation under other other
compilers has not been tested. The requirement of "16-bit DLL" is
nessesitated by the version of PowerBuilder used by syscon.
This module was written by a third party and was then modified by
subcontractor Barry Weck. The code is copyrighted and owned by Syscon
Computers, Inc. of Tuscaloosa Alabama (C) 1999.
************************************************************************/
/************************************************************************
DATA
************************************************************************/
static unsigned long ccitt_32[256] =
{
0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL,
0x0edb8832UL, 0x79dcb8a4UL, 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL,
0x7eb17cbdUL, 0xe7b82d07UL, 0x90bf1d91UL,
0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, 0x1adad47dUL,
0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL,
0x136c9856UL, 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL,
0x63066cd9UL, 0xfa0f3d63UL, 0x8d080df5UL,
0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, 0xa2677172UL, 0x3c03e4d1UL,
0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL,
0x26d930acUL, 0x51de003aUL, 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL,
0x56b3c423UL, 0xcfba9599UL, 0xb8bda50fUL,
0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, 0x2f6f7c87UL,
0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL,
0x76dc4190UL, 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL,
0x06b6b51fUL, 0x9fbfe4a5UL, 0xe8b8d433UL,
0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, 0xe10e9818UL, 0x7f6a0dbbUL,
0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL,
0x65b0d9c6UL, 0x12b7e950UL, 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL,
0x15da2d49UL, 0x8cd37cf3UL, 0xfbd44c65UL,
0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, 0x4adfa541UL,
0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL,
0x4369e96aUL, 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL,
0x33031de5UL, 0xaa0a4c5fUL, 0xdd0d7cc9UL,
0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, 0xc90c2086UL, 0x5768b525UL,
0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL,
0xedb88320UL, 0x9abfb3b6UL, 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL,
0x9dd277afUL, 0x04db2615UL, 0x73dc1683UL,
0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, 0xe40ecf0bUL,
0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL,
0xf00f9344UL, 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL,
0x806567cbUL, 0x196c3671UL, 0x6e6b06e7UL,
0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, 0x67dd4accUL, 0xf9b9df6fUL,
0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL,
0xd80d2bdaUL, 0xaf0a1b4cUL, 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL,
0xa867df55UL, 0x316e8eefUL, 0x4669be79UL,
0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, 0xcc0c7795UL,
0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL,
0xc5ba3bbeUL, 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL,
0xb5d0cf31UL, 0x2cd99e8bUL, 0x5bdeae1dUL,
0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, 0x026d930aUL, 0x9c0906a9UL,
0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL,
0x86d3d2d4UL, 0xf1d4e242UL, 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL,
0xf6b9265bUL, 0x6fb077e1UL, 0x18b74777UL,
0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, 0x8f659effUL,
0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL,
0xa00ae278UL, 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL,
0xd06016f7UL, 0x4969474dUL, 0x3e6e77dbUL,
0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, 0x37d83bf0UL, 0xa9bcae53UL,
0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL,
0xb3667a2eUL, 0xc4614ab8UL, 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL,
0xc30c8ea1UL, 0x5a05df1bUL, 0x2d02ef8dL
};
int WINAPI LibMain(HINSTANCE hInst, WORD wDataSeg, WORD cbHeapSize, LPSTR
lpszCmdLine)
{
if (cbHeapSize != 0)
// UnLockData(0);
return 1;
}
int WINAPI WEP(int nParameter)
{
/*if (nParameter == WEP_SYSTEM_EXIT)
return 1;
else if (nParameter == WEP_FREE_DLL)
return 1;
else*/
return 1;
}
/************************************************************************
Function : CalculateCrc
Description : Calculates CRC for a sequence of BYTEs found in buffer
This routine is iterativly called as long as there are more
chunks of bytes
Parameters : BYTE *buffer - Next chunk of bytes for which a CRC is sought
long count - Number of bytes in this chunk
DWORD crc - Current value of CRC
short *sError - returned error (in case there is an array overrun)
Returns : DWORD CalculateCrc - Calculated CRC
************************************************************************/
DWORD CalculateCrc(BYTE *buffer, long count, DWORD crc, short *sError)
{
DWORD temp1;
DWORD temp2;
int Index;
/* Calculate the 32 bit CRC using the table lookup method. */
while ( count-- != 0 )
{
temp1 = ( crc >> 8 ) & 0x00FFFFFFUL;
Index = (( (int) crc ^ *buffer++ ) & 0xff);
if ((Index < 0) || (Index > 255))
{
*sError = FC_ERR_PROOF_OF_ARRAY_OVERRUN_IN_CRC_CODE;
return(0); /* Time to Panic: array overrun time!*/
}
temp2 = ccitt_32[Index];
crc = temp1 ^ temp2;
}
return( crc );
}
/************************************************************************
Function : long filesize(FILE *stream)
Description : Returns size of file given an already opened file stream
NOTE: Filesize Limit : 2^31
Parameters : FILE *stream - already opened file stream
Returns : Returns size of file
************************************************************************/
long filesize(FILE *stream)
{
long curpos, length;
curpos = ftell(stream);
fseek(stream, 0L, SEEK_END);
length = ftell(stream);
fseek(stream, curpos, SEEK_SET);
return length;
}
/************************************************************************
Function : GetCheckSum
Description : returns the checksum of the file
NOTE: Checksum Range Limit : 0..2^32
NOTE: Filesize Limit : 2^31
Parameters : char* pFileName - name of data file we are calculating CRC for
long lOffset - offset within file
long lLength - how much to check (0 means check entire file
from offset)
short sBufferSize - size of buffer used to read data
short sError - Error Code - see #defines in filecrc.h
unsigned long retval - returned crc of file if no error occured
if error, then this is set to zero
NOTE: Set this to initialize starting
point of CRC
Returns : Ignore the return value - Always equal to 1
************************************************************************/
int WINAPI GetCheckSum(char *pFileName, long lOffset, long lLength,
short sBufferSize, short *sError, unsigned long *retval)
{
BYTE *pBuffer=NULL; /* Current chunk of bytes
from file*/
FILE* pDataFile; /* File Pointer*/
BOOL bDone = FALSE; /* controls main loop*/
long lBytesRead, FileSize, lBytesLeftToRead; /* */
DWORD dwCrc = *retval; /* Current Calculated CRC;
Set to a special value at
the outset
by the caller
*/
*sError = FC_ERR_NOERROR; /* Assume no problems */
*retval=0;
if ((pDataFile = fopen(pFileName,"rb"))==NULL)
*sError = FC_ERR_FILENOTFOUND; /* Can't find the file */
else
{
FileSize = filesize(pDataFile); /* Get Filesize */
if (sBufferSize <= 0)
sBufferSize = FC_DEFAULT_BUFFER_SIZE; /* Sanity check for buffer
size */
/* If Length is 0, then just read the rest of the file FROM THE OFFSET */
if (lLength <= 0)
lLength = FileSize - lOffset;
lBytesLeftToRead = lLength; /* ...and that be how many bytes left to
read, matey; */
if (sBufferSize > lLength) /* OK, since we already checked for neg vals */
sBufferSize = (short)lLength; /* can only happen if lLength <=
"maxShort" */
pBuffer = (BYTE *)malloc(sBufferSize*sizeof(BYTE)); /* Now we can set the
buffer size */
if (lOffset >= FileSize)
*sError = FC_ERR_INVALIDOFFSET; /* oops!, offset past
EOF */
else if ((lOffset+lLength) > FileSize)
*sError = FC_ERR_INVALIDOFFSETPLUSLENGTH; /* oops!, trying to
read past EOF */
else if (pBuffer==NULL)
*sError = FC_ERR_OUTOFMEMORY;
if (*sError == FC_ERR_NOERROR) /* oops!, just plain
out of heap */
{
fseek(pDataFile,lOffset,SEEK_SET); /* start at offset */
while (!bDone)
{
lBytesRead = fread(pBuffer,sizeof(BYTE),sBufferSize,pDataFile);
lBytesLeftToRead -= lBytesRead;
if (lBytesLeftToRead < sBufferSize)
sBufferSize = (short)lBytesLeftToRead; /* just a little bit more
to read next time */
/* get out if (ArrayOverrun in CRC Calculation) or (we are done with
the data stream) */
bDone = (((lBytesRead > 0) &&
((dwCrc = CalculateCrc(pBuffer,lBytesRead,dwCrc,sError)) ==
0) &&
(*sError==FC_ERR_PROOF_OF_ARRAY_OVERRUN_IN_CRC_CODE)) ||
((feof(pDataFile)!=0) || (lBytesLeftToRead <= 0)));
}
if (*sError == FC_ERR_NOERROR)
*retval=dwCrc; /* if there was no error, return the CRC otherwise
make it zero */
}
free(pBuffer); /* clean up */
fclose(pDataFile);
}
return(1);
}
I converted the CalculateCrc function to pure Python and it gives me the
proper results (on small files), but is so slow on large files (we are
processing 650+Mb files) as to be unuseable.
import time
import struct
import os
import binascii
#
# Table for lookups
#
ccitt_32=[
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
0x2d02ef8dL
]
def CalculateCrc(buffer, crc):
global ccitt_32
#
# Calculate the 32 bit CRC using the table lookup method.
#
unpack=struct.unpack
for c in buffer:
#print 'crc=', crc
#print 'crc >> 8=', (crc >> 8)
temp1=crc >> 8
#print 'temp1=', temp1
Index = (crc ^ unpack('B',c)[0]) & 0xff
#print "Index=", Index
if Index < 0 or Index > 255:
sError = 'FC_ERR_PROOF_OF_ARRAY_OVERRUN_IN_CRC_CODE'
return 0, sError; # Time to Panic: array overrun time!
temp2 = ccitt_32[Index]
crc = temp1 ^ temp2
#print 'crc=', crc
return crc, ''
The algorithm looks very much like the source code for
binascii.crc32 (but I'm not a C programmer).
Thanks for everyone's help.
Regards,
Larry Bates