mirror of https://github.com/ecmwf/eccodes.git
Merge pull request #16 from wqu-bom/develop
Improvement on file splitting
This commit is contained in:
commit
89a8a533cb
|
@ -25,9 +25,9 @@ cp ${data_dir}/mixed.grib ./
|
||||||
input=mixed.grib
|
input=mixed.grib
|
||||||
${tools_dir}/codes_split_file 3 $input
|
${tools_dir}/codes_split_file 3 $input
|
||||||
# There should now be 3 new files. Make sure they are valid
|
# There should now be 3 new files. Make sure they are valid
|
||||||
${tools_dir}/grib_ls mixed.grib_01
|
${tools_dir}/grib_ls mixed.grib_001
|
||||||
${tools_dir}/grib_ls mixed.grib_02
|
${tools_dir}/grib_ls mixed.grib_002
|
||||||
${tools_dir}/grib_ls mixed.grib_03
|
${tools_dir}/grib_ls mixed.grib_003
|
||||||
|
|
||||||
total=`${tools_dir}/codes_count mixed.grib_*`
|
total=`${tools_dir}/codes_count mixed.grib_*`
|
||||||
[ $total -eq 14 ]
|
[ $total -eq 14 ]
|
||||||
|
@ -44,7 +44,19 @@ ${tools_dir}/codes_split_file 10 $input
|
||||||
total=`${tools_dir}/codes_count tigge_ecmwf.grib2_[0-9]*`
|
total=`${tools_dir}/codes_count tigge_ecmwf.grib2_[0-9]*`
|
||||||
[ $total -eq 248 ]
|
[ $total -eq 248 ]
|
||||||
|
|
||||||
cat tigge_ecmwf.grib2_0[1-9] tigge_ecmwf.grib2_10 > $temp
|
cat tigge_ecmwf.grib2_[0-9][0-9][0-9] > $temp
|
||||||
|
${tools_dir}/grib_compare $input $temp
|
||||||
|
|
||||||
|
|
||||||
|
# Test 3: nchunk=-1 File with 248 messages
|
||||||
|
# -----------------------------
|
||||||
|
cp ${data_dir}/tigge_ecmwf.grib2 ./
|
||||||
|
input=tigge_ecmwf.grib2
|
||||||
|
${tools_dir}/codes_split_file -1 $input
|
||||||
|
total=`${tools_dir}/codes_count tigge_ecmwf.grib2_[0-9]*`
|
||||||
|
[ $total -eq 248 ]
|
||||||
|
|
||||||
|
cat tigge_ecmwf.grib2_[0-9][0-9][0-9] > $temp
|
||||||
${tools_dir}/grib_compare $input $temp
|
${tools_dir}/grib_compare $input $temp
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,16 +13,19 @@
|
||||||
* Split an input file (GRIB, BUFR etc) into chunks of roughly the same size.
|
* Split an input file (GRIB, BUFR etc) into chunks of roughly the same size.
|
||||||
* The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy
|
* The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy
|
||||||
*
|
*
|
||||||
|
* 2019-07-26 W.Qu Allow an input file to be split into each individual message (if nchunk=-1)
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "grib_api_internal.h"
|
#include "grib_api_internal.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
static int verbose = 0;
|
static int verbose = 0;
|
||||||
static const char* OUTPUT_FILENAME_FORMAT = "%s_%02d"; /* x_01, x_02 etc */
|
static const char* OUTPUT_FILENAME_FORMAT = "%s_%03d"; /* x_001, x_002 etc */
|
||||||
static void usage(const char* prog)
|
static void usage(const char* prog)
|
||||||
{
|
{
|
||||||
printf("Usage: %s [-v] nchunks infile\n",prog);
|
printf("Usage: %s [-v] nchunks infile\n",prog);
|
||||||
|
printf("nchunks=-1, split infile into individual grib/bufr message\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,8 +33,7 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
|
||||||
{
|
{
|
||||||
void* mesg=NULL;
|
void* mesg=NULL;
|
||||||
FILE* out;
|
FILE* out;
|
||||||
off_t insize=0;
|
size_t size=0,read_size=0,insize=0,chunk_size, msg_size=0, num_msg=0;
|
||||||
size_t size=0,read_size=0,chunk_size;
|
|
||||||
off_t offset=0;
|
off_t offset=0;
|
||||||
int err=GRIB_SUCCESS;
|
int err=GRIB_SUCCESS;
|
||||||
int i;
|
int i;
|
||||||
|
@ -45,11 +47,13 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
|
||||||
|
|
||||||
fseeko(in, 0, SEEK_END);
|
fseeko(in, 0, SEEK_END);
|
||||||
insize = ftello(in);
|
insize = ftello(in);
|
||||||
if (insize==-1)
|
|
||||||
return 1;
|
|
||||||
fseeko(in, 0, SEEK_SET);
|
fseeko(in, 0, SEEK_SET);
|
||||||
assert(nchunks > 0);
|
if(nchunks == -1){
|
||||||
chunk_size=insize/nchunks;
|
chunk_size = size;
|
||||||
|
}else{
|
||||||
|
assert(nchunks > 0);
|
||||||
|
chunk_size=insize/nchunks;
|
||||||
|
}
|
||||||
|
|
||||||
i=1;
|
i=1;
|
||||||
sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i);
|
sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i);
|
||||||
|
@ -62,8 +66,10 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
|
||||||
|
|
||||||
while ( err!=GRIB_END_OF_FILE ) {
|
while ( err!=GRIB_END_OF_FILE ) {
|
||||||
mesg=wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err);
|
mesg=wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err);
|
||||||
if (mesg!=NULL && err==0) {
|
num_msg++;
|
||||||
if (fwrite(mesg,1,size,out)!=size) {
|
/*printf("=1=%d\t%d\t%d\n",*count,size,insize);*/
|
||||||
|
if ( mesg!=NULL && err==0 ) {
|
||||||
|
if (fwrite(mesg,1,size,out)!=size ) {
|
||||||
perror(ofilename);
|
perror(ofilename);
|
||||||
free(ofilename);
|
free(ofilename);
|
||||||
fclose(out);
|
fclose(out);
|
||||||
|
@ -71,11 +77,13 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
|
||||||
}
|
}
|
||||||
grib_context_free(c,mesg);
|
grib_context_free(c,mesg);
|
||||||
read_size+=size;
|
read_size+=size;
|
||||||
if (read_size>chunk_size) {
|
msg_size+=size;
|
||||||
if (verbose) printf("Wrote output file %s\n", ofilename);
|
if (read_size>chunk_size && msg_size < insize) {
|
||||||
|
if (verbose) printf("Wrote output file %s (%d msgs)\n", ofilename, num_msg);
|
||||||
fclose(out);
|
fclose(out);
|
||||||
i++;
|
i++;
|
||||||
/* Start writing to the next file */
|
/* Start writing to the next file */
|
||||||
|
/*printf("=2=%d\t%d\n",*count,msg_size);*/
|
||||||
sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i);
|
sprintf(ofilename, OUTPUT_FILENAME_FORMAT, filename, i);
|
||||||
out=fopen(ofilename,"w");
|
out=fopen(ofilename,"w");
|
||||||
if (!out) {
|
if (!out) {
|
||||||
|
@ -84,11 +92,12 @@ static int split_file(FILE* in, const char* filename, const int nchunks, unsigne
|
||||||
return GRIB_IO_PROBLEM;
|
return GRIB_IO_PROBLEM;
|
||||||
}
|
}
|
||||||
read_size=0;
|
read_size=0;
|
||||||
|
num_msg=0;
|
||||||
}
|
}
|
||||||
(*count)++;
|
(*count)++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (verbose) printf("Wrote output file %s\n", ofilename);
|
if (verbose) printf("Wrote output file %s (%d msgs)\n", ofilename,num_msg-1);
|
||||||
fclose(out);
|
fclose(out);
|
||||||
free(ofilename);
|
free(ofilename);
|
||||||
|
|
||||||
|
@ -117,8 +126,8 @@ int main(int argc,char* argv[])
|
||||||
|
|
||||||
/* add some error checking */
|
/* add some error checking */
|
||||||
nchunks=atoi(argv[i]);
|
nchunks=atoi(argv[i]);
|
||||||
if (nchunks<1) {
|
if (nchunks<1 && nchunks!=-1) {
|
||||||
fprintf(stderr,"ERROR: Invalid number %d. Please specify a positive integer.\n", nchunks);
|
fprintf(stderr,"ERROR: Invalid number %d. Please specify a positive integer. or -1 for spliting each message\n", nchunks);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue