Merge pull request #16 from wqu-bom/develop

Improvement on file splitting
This commit is contained in:
shahramn 2019-08-22 13:49:50 +01:00 committed by GitHub
commit 89a8a533cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 18 deletions

View File

@ -25,9 +25,9 @@ cp ${data_dir}/mixed.grib ./
input=mixed.grib input=mixed.grib
${tools_dir}/codes_split_file 3 $input ${tools_dir}/codes_split_file 3 $input
# There should now be 3 new files. Make sure they are valid # There should now be 3 new files. Make sure they are valid
${tools_dir}/grib_ls mixed.grib_01 ${tools_dir}/grib_ls mixed.grib_001
${tools_dir}/grib_ls mixed.grib_02 ${tools_dir}/grib_ls mixed.grib_002
${tools_dir}/grib_ls mixed.grib_03 ${tools_dir}/grib_ls mixed.grib_003
total=`${tools_dir}/codes_count mixed.grib_*` total=`${tools_dir}/codes_count mixed.grib_*`
[ $total -eq 14 ] [ $total -eq 14 ]
@ -44,7 +44,19 @@ ${tools_dir}/codes_split_file 10 $input
total=`${tools_dir}/codes_count tigge_ecmwf.grib2_[0-9]*` total=`${tools_dir}/codes_count tigge_ecmwf.grib2_[0-9]*`
[ $total -eq 248 ] [ $total -eq 248 ]
cat tigge_ecmwf.grib2_0[1-9] tigge_ecmwf.grib2_10 > $temp cat tigge_ecmwf.grib2_[0-9][0-9][0-9] > $temp
${tools_dir}/grib_compare $input $temp
# Test 3: nchunk=-1 File with 248 messages
# -----------------------------
cp ${data_dir}/tigge_ecmwf.grib2 ./
input=tigge_ecmwf.grib2
${tools_dir}/codes_split_file -1 $input
total=`${tools_dir}/codes_count tigge_ecmwf.grib2_[0-9]*`
[ $total -eq 248 ]
cat tigge_ecmwf.grib2_[0-9][0-9][0-9] > $temp
${tools_dir}/grib_compare $input $temp ${tools_dir}/grib_compare $input $temp

View File

@ -13,16 +13,19 @@
* Split an input file (GRIB, BUFR etc) into chunks of roughly the same size. * Split an input file (GRIB, BUFR etc) into chunks of roughly the same size.
* The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy * The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy
* *
* 2019-07-26 W.Qu Allow an input file to be split into each individual message (if nchunk=-1)
*
*/ */
#include "grib_api_internal.h" #include "grib_api_internal.h"
#include <assert.h> #include <assert.h>
static int verbose = 0; static int verbose = 0;
static const char* OUTPUT_FILENAME_FORMAT = "%s_%02d"; /* x_01, x_02 etc */ static const char* OUTPUT_FILENAME_FORMAT = "%s_%03d"; /* x_001, x_002 etc */
static void usage(const char* prog) static void usage(const char* prog)
{ {
printf("Usage: %s [-v] nchunks infile\n",prog); printf("Usage: %s [-v] nchunks infile\n",prog);
printf("nchunks=-1, split infile into individual grib/bufr message\n");
exit(1); exit(1);
} }
@ -30,8 +33,7 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
{ {
void* mesg=NULL; void* mesg=NULL;
FILE* out; FILE* out;
off_t insize=0; size_t size=0,read_size=0,insize=0,chunk_size, msg_size=0, num_msg=0;
size_t size=0,read_size=0,chunk_size;
off_t offset=0; off_t offset=0;
int err=GRIB_SUCCESS; int err=GRIB_SUCCESS;
int i; int i;
@ -45,11 +47,13 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
fseeko(in, 0, SEEK_END); fseeko(in, 0, SEEK_END);
insize = ftello(in); insize = ftello(in);
if (insize==-1)
return 1;
fseeko(in, 0, SEEK_SET); fseeko(in, 0, SEEK_SET);
assert(nchunks > 0); if(nchunks == -1){
chunk_size=insize/nchunks; chunk_size = size;
}else{
assert(nchunks > 0);
chunk_size=insize/nchunks;
}
i=1; i=1;
sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i); sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i);
@ -62,8 +66,10 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
while ( err!=GRIB_END_OF_FILE ) { while ( err!=GRIB_END_OF_FILE ) {
mesg=wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err); mesg=wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err);
if (mesg!=NULL && err==0) { num_msg++;
if (fwrite(mesg,1,size,out)!=size) { /*printf("=1=%d\t%d\t%d\n",*count,size,insize);*/
if ( mesg!=NULL && err==0 ) {
if (fwrite(mesg,1,size,out)!=size ) {
perror(ofilename); perror(ofilename);
free(ofilename); free(ofilename);
fclose(out); fclose(out);
@ -71,11 +77,13 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
} }
grib_context_free(c,mesg); grib_context_free(c,mesg);
read_size+=size; read_size+=size;
if (read_size>chunk_size) { msg_size+=size;
if (verbose) printf("Wrote output file %s\n", ofilename); if (read_size>chunk_size && msg_size < insize) {
if (verbose) printf("Wrote output file %s (%d msgs)\n", ofilename, num_msg);
fclose(out); fclose(out);
i++; i++;
/* Start writing to the next file */ /* Start writing to the next file */
/*printf("=2=%d\t%d\n",*count,msg_size);*/
sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i); sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i);
out=fopen(ofilename,"w"); out=fopen(ofilename,"w");
if (!out) { if (!out) {
@ -84,11 +92,12 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
return GRIB_IO_PROBLEM; return GRIB_IO_PROBLEM;
} }
read_size=0; read_size=0;
num_msg=0;
} }
(*count)++; (*count)++;
} }
} }
if (verbose) printf("Wrote output file %s\n", ofilename); if (verbose) printf("Wrote output file %s (%d msgs)\n", ofilename,num_msg-1);
fclose(out); fclose(out);
free(ofilename); free(ofilename);
@ -117,8 +126,8 @@ int main(int argc,char* argv[])
/* add some error checking */ /* add some error checking */
nchunks=atoi(argv[i]); nchunks=atoi(argv[i]);
if (nchunks<1) { if (nchunks<1 && nchunks!=-1) {
fprintf(stderr,"ERROR: Invalid number %d. Please specify a positive integer.\n", nchunks); fprintf(stderr,"ERROR: Invalid number %d. Please specify a positive integer. or -1 for spliting each message\n", nchunks);
return 1; return 1;
} }