read binary .dat file?

G

Greg Halsey

I found a PERL script to read a dataflex database file. But I don't know
PERL. Any suggestions to translate to ruby? Is there a ruby script like
this around?

-thanks.

Here is the PERL Script:

use File::Binary ;
use Date::Calc qw(Add_Delta_Days);

# Dataflex program DFQUERY.EXE does just about the same thing as this
program.
# For info on File::Binary and Date::Calc, search on
http://search.cpan.org/
# http://search.cpan.org/~simonw/File-Binary-1.4/lib/File/Binary.pm
# http://search.cpan.org/~stbey/Date-Calc-5.4/Calc.pod
#
# This program was written with help from
http://www.wotsit.org/download.asp?f=dataflex
# If direct link does not work, go to http://www.wotsit.org and search
for dataflex
#
# I used a program similar to this in order to extract data from
dataflex and
# insert it into a microsoft access database.
#
# Feel free to ask any questions, E-mail: (e-mail address removed)
# Mattias Malmgren 2007-03-25

if ($ARGV[0] eq "") {
die "Usage DATAFLEX2TXT.PL FILENAME (BUT NOT THE SUFFIX, EG. EMPLOYEE,
NOT EMPLOYEE.DAT\n";
}

$numberrecdef{"RECORD COUNT"} = {"start" =>9 , "length" =>
4};
$numberrecdef{"RECORD LENGTH"} = {"start" =>79 , "length"
=> 2};
$numberrecdef{"HIGHEST RECORD COUNT EVER"} = {"start" =>1 ,
"length" => 4};
$numberrecdef{"MAXIMUM NUMBER OF RECORDS"} = {"start" =>13 ,
"length" => 4};
$numberrecdef{"NUMBER OF FIELDS"} = {"start" =>90 , "length"
=> 1};
$numberrecdef{"DELETED SPACE"} = {"start" =>89 , "length" =>
1};
$charrecdef{"FILE ROOT NAME"} = {"start" =>181 , "length" =>
8};
$fielddef{"FIELD OFFSET"} = {"start" =>197 , "length" => 2};
$fielddef{"FIELD LENGTH"} = {"start" =>200 , "length" => 1};
$fielddef{"FIELD TYPE"} = {"start" =>201 , "length" => 1};
$TOTALFILEDDEFLENGTH = 8;
$STARTOFFIELDDEFINITIONS = 197;
%FIELDTYPE = ("0"=>"ASCII", "1"=>"NUMERIC", "2"=>"DATE",
"3"=>"OVERLAP");
@keyorder=("FIELD OFFSET","FIELD LENGTH","FIELD TYPE");
@NOW = localtime(time);
$YEARLIMIT = $NOW[5]+1900-100;


open (FIL, "$ARGV[0].TAG");
@fieldnames = <FIL>;
close (FIL);
for ($i=0;$i<@fieldnames ;$i++) {
($fieldnames[$i] = $fieldnames[$i]) =~ s/[\r\n]//g;
}
$fb = File::Binary->new("$ARGV[0].DAT");
$fb->set_endian($LITTLE_ENDIAN);

foreach $key (sort( keys(%numberrecdef))) {
$value = &getNumberValue($numberrecdef{"$key"}->{"start"},
$numberrecdef{"$key"}->{"length"});
print qq($key = $value\n);
}
foreach $key (sort( keys(%charrecdef))) {
$value = &getCharValue($charrecdef{"$key"}->{"start"},
$charrecdef{"$key"}->{"length"});
print qq($key = $value\n);
}

$nfields = &getNumberValue($numberrecdef{"NUMBER OF FIELDS"}->{"start"},
$numberrecdef{"NUMBER OF FIELDS"}->{"length"});
$lastreadingpos=0;
undef(@fielddefs);
for ($i=0;$i<$nfields;$i++) {
$offset = $TOTALFILEDDEFLENGTH * $i;
$fnbr = $i +1;
$fieldoffset = &getNumberValue(($fielddef{"FIELD
OFFSET"}->{"start"}+$offset), $fielddef{"FIELD OFFSET"}->{"length"});
$fieldlength = &getNumberValue(($fielddef{"FIELD
LENGTH"}->{"start"}+$offset), $fielddef{"FIELD LENGTH"}->{"length"});
$fieldtype = &getNumberValue(($fielddef{"FIELD
TYPE"}->{"start"}+$offset), $fielddef{"FIELD TYPE"}->{"length"});
$fieldtype = $FIELDTYPE{$fieldtype};
push(@fielddefs, {"fieldoffset"=>$fieldoffset,
"fieldlength"=>$fieldlength, "fieldtype"=>"$fieldtype"});
}

$recordcount = &getNumberValue($numberrecdef{"RECORD COUNT"}->{"start"},
$numberrecdef{"RECORD COUNT"}->{"length"});
#$recordcount = 3; # you might wanna test with only the first 3 reccords
$recordlength = &getNumberValue($numberrecdef{"RECORD
LENGTH"}->{"start"}, $numberrecdef{"RECORD LENGTH"}->{"length"});
$offset+=8;
$startpos=0;
while ($startpos<$offset) {
$startpos += 512;
}
$startpos += $recordlength; # jump over the first reccord which is a
null reccord
$recstartpos = $startpos;
if ($recordlength<512) {
$recsin512=int(512 / $recordlength);
$fillbytes = 512 % $recordlength;
print "Using $fillbytes fillbyte(s)\n";
}

print "RECNUM;";
print join(";",@fieldnames) . "\n";
for ($recn=0;$recn<$recordcount;$recn++) {
print ($recn+1);
for ($i=0;$i<@fielddefs;$i++) {
$pos = $recstartpos+$fielddefs[$i]->{"fieldoffset"};
$length = $fielddefs[$i]->{"fieldlength"};
$type = $fielddefs[$i]->{"fieldtype"};
if ($type eq "ASCII" || $type eq "OVERLAP") {
$value = &getCharValue($pos , $length );
}
elsif ($type eq "NUMERIC") {
$value = &getDecNumberValue($pos , $length );
}
elsif ($type eq "DATE") {
$value = &getDateValue($pos , $length );
}
else {
print "\nUnknown format on rec number $recn\n";
}
print ";$value";
}
$recstartpos += $recordlength;
if ($recordlength<512) {
if ( ( ($recn+2) % $recsin512) == 0) {
$recstartpos += $fillbytes ;
}
}
print "\n";
}
$fb->close();
close (FIL);


sub getNumberValue() {
my ($start, $length) = @_;
my ($c, $sum, $i);
$fb->seek($start-1);
for ($i=0;$i<$length;$i++) {
$c= $fb->get_ui8();
$sum += $c * (256**$i);
}
return $sum;
}

sub getDecNumberValue() {
my ($start, $length) = @_;
my ($c, $sum, $i,$power,@slot,$positive);
# numeric values is eg stored as hexvalue as 10:th = (xd)
# then xd * 10**6 + xd * 10**4 + xd * 10**2 + xd*10**0
# Fantastic, I have never sean anything like it before!
# (On the other hand, I am not a programmer)
$fb->seek($start-1);
$power=0;
for ($i=1;$i<$length-1;$i++) {
$power+=2;
}
$plusminus = $fb->get_ui8(); # fist slot is == 0 for negative numbers
and >0 for positive numbers (or acuratly 10).
# read the remaining bytes
for ($i=0;$i<$length-1;$i++) {
$c=$fb->get_ui8();
push(@slot, sprintf ("%x", $c ));
}
$sum=0;
for ($i=0;$i<@slot;$i++) {
$sum += $slot[$i] * (10**$power);
$power -= 2;
}
if ($plusminus == 0) {
$sum = -1 * $sum;
}
return $sum;
}

sub getCharValue() {
my ($start, $length) = @_;
my ($c, $str, $i);
$fb->seek($start-1);
for ($i=$0;$i<$length;$i++) {
$c= $fb->get_ui8();
$str .= sprintf("%c",$c);
}
#($str = $str) =~ s/ +$//; # If you whant to get rid of trailing white
space
return $str;
}

sub getDateValue() {
my ($start, $length) = @_;
my ($c, $julianday, $i, @slot , $str);
$fb->seek($start-1);
for ($i=0;$i<$length;$i++) {
$c= $fb->get_ui8();
push (@slot, sprintf("%x",$c));
}
$julianday = ($slot[0]*(10**4)) + ($slot[1]*(10**2)) +
($slot[2]*(10**0)) ;
if ($julianday <= 100000) {
#Add_Delta_Days will die if the value is out of range.
#If that happens, perhaps you would like to increase 100000 in the
#if-statment above.
return "";
}
# I found out that the date 31 Mars 1996 is equal to 829129 accoring
to dataflex
# by comparing numbers with printouts of data from a dataflex system
$JULIANDAY19960331 = 829129;
$delta = $julianday - $JULIANDAY19960331 ;
my ($year,$month,$day) = Add_Delta_Days(1996,3,31, $delta);
if ($year<=$YEARLIMIT) {
$year+=100;
}
$str = "$year-$month-$day";
return $str;
}
 
P

Paul Stickney

Good luck; there crucial parts are
1) reading the record [fixed field sizes], look how %numberrecdef (and
friends) is setup
2) normalizing the data (just about everything below)

Part #1 should be relatively easy to solve with a quick RAA search for
treating binary data as defined by a particular format.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,731
Messages
2,569,432
Members
44,832
Latest member
GlennSmall

Latest Threads

Top