here is my first cut at the code. I have not run it ye
Here is what it looks like now. I am using hit in generating my web
pages.
/*
* [Lazy.java]
*
* Summary: Lets us avoid the work of expanding macros if they were
done successfully earlier.
*
* Copyright: (c) 2012-2014 Roedy Green, Canadian Mind Products,
http://mindprod.com
*
* Licence: This software may be copied and used freely for any
purpose but military.
*
http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.7+
*
* Created with: JetBrains IntelliJ IDEA IDE
http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2014-04-21 initial version.
*/
package com.mindprod.htmlmacros.support;
import com.mindprod.common17.FNV1a64;
import com.mindprod.common17.Misc;
import com.mindprod.common17.ST;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import static java.lang.System.err;
/**
* Lets us avoid the work of expanding macros if they were done
successfully earlier.
* To use it:
* 1. Call the constructor to create a Lazy object.
* 2. call lazy.open
* 3. for each file you are considering processing call
lazy.isAlreadyDone
* 4. after you have processed each file call lazy.markStatus.
* 5. when you are done call lazy.close
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2014-04-21 initial version
* @since 2014-04-21
*/
public class Lazy
{
// declarations
/**
* size of buffer in bytes
*/
private static final int BUFFERSIZE_IN_BYTES = 64 * 1024;
/**
* allow 5 seconds of slop in matching dates
*/
private static final long SLOP = TimeUnit.SECONDS.toMillis( 5 );
/**
* length of each record in the serialised cache file in bytes.
* two 64-bit longs.
*/
private static int RECORD_LENGTH = ( 64 + 64 ) / 8;
/**
* lead ignorable string on most files
*/
private String filenamePrefix;
/**
* trailing ignorable string on most files
*/
private String filenameSuffix;
/**
* binary file where we store state between runs.
*/
private File cacheFile;
/**
* look up filename hash-32 to timestamp
*/
private HashMap<Long, Long> hashToTimestamp;
// end declarations
/**
* true if this Lasy is open
*/
private boolean isOpen;
// methods
/**
* constructor
*/
public Lazy()
{
isOpen = false;
}
/**
* save the contents of the lookup cacheFIle into
embellishments/cacheFIle.bin
* It is a binary file of pairs hash-32, timestamp-64
*/
public void close()
{
if ( !isOpen )
{
return;
}
try
{
// O P E N
final DataOutputStream dos = Misc.getDataOutputStream(
this.cacheFile, BUFFERSIZE_IN_BYTES );
for ( Entry<Long, Long> entry : hashToTimestamp.entrySet()
)
{
// W R I T E
final long hash = entry.getKey();
final long timestamp = entry.getValue();
// writing in big-endian binary to be compact and
fast.
dos.writeLong( hash ); // we write int and long, not
Integer and Long.
dos.writeLong( timestamp );
}
// C L O S E
dos.close();
} // end if
catch ( IOException e )
{
err.println( ">>> Warning. Unable to write " +
this.cacheFile.getAbsolutePath() + " file "
+ e.getMessage() );
}
isOpen = false;
}// end method
/**
* has this file already been processed and is unchanged since
that time?
*
* @param file file we are processing.
*
* @return true if the file has already been successfully
processed.
*/
public boolean isAlreadyDone( File file )
{
if ( !isOpen )
{
throw new IllegalArgumentException( "Lazy.open() has not
yet been called." );
}
final long hash = calcHash( file );
final Long timestampL = hashToTimestamp.get( hash );
// if no entry, it was not registered as done.
if ( timestampL == null )
{
return false;
}
// if all is well ,the last modified date should not have
changed since we recorded the file as
// successfully processed.
if ( file.lastModified() > timestampL + SLOP )
{
// the file has been modified since we last processed it.
// we will have to reprocess it.
// This cacheFile entry is useless. We might as well get
rid of it now to save some space.
hashToTimestamp.remove( hash );
return false;
}
else
{
// it has not been touched since we last successfully
processed it.
// the cacheFile entry is fine as is. This is the whole
point, to save reprocessing it.
return true;
}
}// end method
/**
* Mark the status of this file.
*
* @param file file we are processing.
* @param status true= file successfully processed, false=file was
not successfully processed.
*/
public void markStatus( File file, boolean status )
{
if ( !isOpen )
{
throw new IllegalArgumentException( "Lazy.open() has not
yet been called." );
}
final long hash = calcHash( file );
if ( status )
{
// GOOD
// we record the fact by leaving an entry with hash/Now.
// file was just given or will soon be given a timestamp
close to this.
hashToTimestamp.put( hash, System.currentTimeMillis() );
// collisions are so rare, we do not worry about them. Two
files sharing a hash.
}
else
{
// BAD
// erase all record of it. There may be no record already
hashToTimestamp.remove( hash );
}
}// end method
/**
* Open and read the cacheFIle file.
*
* @param cacheFile cacheFile file with state from last stime
storted. If the file does not exist,
* we start over. e.g. new File(
"E:\\mindprod\\embellishments\\cacheFIle.bin")
* @param filePrefix If nearly all filenames start the same
way, the common lead string, null or "" otherwise.
* e.g. "E:\mindprod\". Use \ or / to
match the way you specify the files
* you feed to markStatus.
* @param fileSuffix If nearly all filenames end the same way,
the common trailing string, null or "" otherwise.
* e.g. ".html". Use \ or / to match the
way you specify the files
* you feed to markStatus.
* @param estimatedFiles estimate of how many files we will
process
*/
public void open( final File cacheFile, final String filePrefix,
final String fileSuffix, final int estimatedFiles )
{
if ( isOpen )
{
return;
}
this.cacheFile = cacheFile;
this.filenamePrefix = ST.canonical( filePrefix );
this.filenameSuffix = ST.canonical( fileSuffix );
if ( cacheFile.exists() && cacheFile.canRead() )
{
// load up the HashMap we use to track when files were
last successfully processed.
final int elts = Math.max( estimatedFiles, ( int )
cacheFile.length() / RECORD_LENGTH );
// allow some padding to avoid collisions
hashToTimestamp = new HashMap<>( elts + elts / 4 ); //
25% padding
// load binary long pairs from it.
DataInputStream dis = null;
try
{
try
{
// O P E N
dis = Misc.getDataInputStream( cacheFile,
BUFFERSIZE_IN_BYTES );
while ( true )
{
// R E A D pairs hash-64, timestamp-64
long hash = dis.readLong();
long timestamp = dis.readLong();
hashToTimestamp.put( hash, timestamp );
} // end loop
} // end inner try
catch ( EOFException e )
{
// nothing to do
}
finally
{
// C L O S E
if ( dis != null )
{
dis.close();
}
}
} // end outer try
catch ( IOException e )
{
err.println( ">>> Warning. Unable to read " +
cacheFile.getAbsolutePath() + " file" );
// we carry on, using as much as we could read.
}
} // end if
else
{
hashToTimestamp = new HashMap<>( estimatedFiles +
estimatedFiles / 4 );
}
isOpen = true;
}// end method
/**
* calculate a hash-64 of the name of the file, not its contents
*
* @param file file to be processed
*
* @return 64-bit FNV1a64 hash.
*/
private long calcHash( final File file )
{
// prune down if possible.
final String chopped = ST.chopLeadingString(
ST.chopTrailingString( file.getAbsolutePath(), this.filenameSuffix ),
this.filenamePrefix );
return FNV1a64.computeHash( chopped );
}// end method
// end methods
} // end class