Merge files

Discussion in 'Perl Misc' started by George Mpouras, Aug 7, 2013.

  1. # I am writing something, so here is a subroutine of it, you may like.
    # It simple merge files, a little bit more careful than usual



    #!/usr/bin/perl
    use strict;
    use warnings;


    MergeFiles( DIR => 'r:/tmp/dir' , OUTPUTFILE => '/tmp/big.txt') or die
    "oups $^E\n";


    # Merge all tiles that exist in a directory to a big one.
    # It tries to be clever by merging files to the biggest of them.
    # Also it prefers the newer files first to help any potential sort later

    # MergeFiles( DIR => '/tmp' , OUTPUTFILE => /tmp/big' ) || die;
    #
    sub MergeFiles
    {
    my %option = @_;
    exists $option{$_} || die "The \"$_\" argument is missing from ".(caller
    0)[3]."\n" foreach qw/DIR OUTPUTFILE/;
    opendir DIRFORMERGEFILES, $option{'DIR'} or return 0;
    my @File;
    while (readdir DIRFORMERGEFILES) {
    my $node = "$option{'DIR'}/$_";
    next unless -f $node;
    push @File, [ $node , -s _ , -M _ ] }
    closedir DIRFORMERGEFILES;
    return 1 if -1 == $#File;
    my @FileSorted;

    for ( sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @File )
    {
    push @FileSorted, $_->[0]
    }

    @File=();

    if (scalar @FileSorted > 1)
    {
    # Put a final new line character at the bigger file we are going to
    merge to in case it does not exist
    my $data;
    open BIGERFILETOMERGE, '<', $FileSorted[0] or return 0;
    binmode BIGERFILETOMERGE, ':raw';
    seek BIGERFILETOMERGE, -1 , 2;
    read BIGERFILETOMERGE, $data , 1;
    close BIGERFILETOMERGE;
    my $the_bigger_file_a_final_new_line_character = $data eq chr 10 ? 1 : 0;
    open BIGERFILETOMERGE, '>>', $FileSorted[0] or return 0;
    print BIGERFILETOMERGE "\n" unless
    $the_bigger_file_a_final_new_line_character;

    for (my $i=1; $i < @FileSorted; $i++) {
    open MERGETHISFILE, '<', $FileSorted[$i] or return 0;
    while (<MERGETHISFILE>) { next if /^\s*$/; chomp; s/\s*$//; print
    BIGERFILETOMERGE "$_\n" }
    close MERGETHISFILE;
    unlink $FileSorted[$i] || die "Could not delete file
    \"$FileSorted[$i]\" because \"$^E\"\n" }
    close BIGERFILETOMERGE;
    }

    unless ($FileSorted[0] eq $option{'OUTPUTFILE'}) {
    rename($FileSorted[0], $option{OUTPUTFILE}) or die "Could not rename
    file \"$FileSorted[0]\" to \"$option{OUTPUTFILE}\" because \"$^E\"\n" }
    return 1
    }
    George Mpouras, Aug 7, 2013
    #1
    1. Advertising

  2. a little bug correction









    # Merge all tiles that exist in a directory to a big one.
    # It tries to be clever by merging files to the biggest of them
    # Also it prefers the newer files first to help any potential sort later

    # MergeFiles( DIR => '/tmp' , OUTPUTFILE => /tmp/big' ) || die;
    #
    sub MergeFiles
    {
    my %option = @_;
    exists $option{$_} || die "The \"$_\" argument is missing from ".(caller
    0)[3]."\n" foreach qw/DIR OUTPUTFILE/;
    opendir DIRFORMERGEFILES, $option{'DIR'} or return 0;
    my @File;
    while (my $name = readdir DIRFORMERGEFILES) {
    my $path = "$option{'DIR'}/$name";
    next unless -f $path;
    push @File, [ $path , -s _ , -M _ ] }
    closedir DIRFORMERGEFILES;
    return 1 if -1 == $#File;
    my @FileSorted;

    for ( sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @File )
    {
    push @FileSorted, $_->[0]
    }

    @File=();

    if (scalar @FileSorted > 1)
    {
    # Put a final new line character at the bigger file we are going to
    merge to in case it does not exist
    my $data;
    open BIGERFILETOMERGE, '<', $FileSorted[0] or return 0;
    binmode BIGERFILETOMERGE, ':raw';
    seek BIGERFILETOMERGE, -1 , 2;
    read BIGERFILETOMERGE, $data , 1;
    close BIGERFILETOMERGE;
    my $the_bigger_file_a_final_new_line_character = $data eq chr 10 ? 1 : 0;
    open BIGERFILETOMERGE, '>>', $FileSorted[0] or return 0;
    print BIGERFILETOMERGE "\n" unless
    $the_bigger_file_a_final_new_line_character;

    for (my $i=1; $i < @FileSorted; $i++) {
    open MERGETHISFILE, '<', $FileSorted[$i] or return 0;
    while (<MERGETHISFILE>) { next if /^\s*$/; chomp; s/\s*$//; print
    BIGERFILETOMERGE "$_\n" }
    close MERGETHISFILE;
    unlink $FileSorted[$i] || die "Could not delete file
    \"$FileSorted[$i]\" because \"$^E\"\n" }
    close BIGERFILETOMERGE;
    }

    unless ($FileSorted[0] eq $option{'OUTPUTFILE'}) {
    rename($FileSorted[0], $option{OUTPUTFILE}) or die "Could not rename
    file \"$FileSorted[0]\" to \"$option{OUTPUTFILE}\" because \"$^E\"\n" }
    return 1
    }
    George Mpouras, Aug 8, 2013
    #2
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. =?Utf-8?B?a3Jz?=

    merge .pdf files

    =?Utf-8?B?a3Jz?=, Apr 6, 2005, in forum: ASP .Net
    Replies:
    2
    Views:
    5,692
    flo2007
    Jun 1, 2007
  2. John Huntjens

    merge of 2 xml files

    John Huntjens, Sep 14, 2003, in forum: XML
    Replies:
    0
    Views:
    727
    John Huntjens
    Sep 14, 2003
  3. Luke Airig
    Replies:
    1
    Views:
    914
    Patrick TJ McPhee
    Dec 16, 2003
  4. Luke Airig
    Replies:
    0
    Views:
    631
    Luke Airig
    Dec 21, 2003
  5. Eirik Eldorsen

    [Ant] Merge two xml files

    Eirik Eldorsen, Jan 24, 2004, in forum: XML
    Replies:
    1
    Views:
    6,300
    Brian Agnew
    Apr 12, 2004
Loading...

Share This Page