eccodes/tools/codes_split_file.cc

170 lines
4.6 KiB
C++

/*
* (C) Copyright 2005- ECMWF.
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
*
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
*/
/*
* Description:
* Split an input file (GRIB, BUFR etc) into chunks of roughly the same size.
* The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy
*
* 2019-07-26 W.Qu Allow an input file to be split into each individual message (if nchunk=-1)
*
*/
#include "grib_api_internal.h"
#include <assert.h>
static int verbose = 0;
static const char* OUTPUT_FILENAME_FORMAT = "%s_%03d"; /* x_001, x_002 etc */
static void usage(const char* prog)
{
printf("Usage: %s [-v] nchunks infile\n", prog);
printf("Setting nchunks=-1 splits infile into individual messages\n");
exit(1);
}
static int split_file(FILE* in, const char* filename, const int nchunks, unsigned long* count)
{
void* mesg = NULL;
FILE* out;
size_t size = 0, read_size = 0, insize = 0, chunk_size, msg_size = 0, num_msg = 0;
size_t ofilenameMaxLen = 0;
off_t offset = 0;
int err = GRIB_SUCCESS;
int i;
char* ofilename;
grib_context* c = grib_context_get_default();
if (!in)
return 1;
/* name of output file */
ofilenameMaxLen = strlen(filename) + 10;
ofilename = (char*)calloc(1, ofilenameMaxLen);
fseeko(in, 0, SEEK_END);
insize = ftello(in);
fseeko(in, 0, SEEK_SET);
if (nchunks == -1) {
chunk_size = size;
}
else {
assert(nchunks > 0);
chunk_size = insize / nchunks;
}
i = 1;
snprintf(ofilename, ofilenameMaxLen, OUTPUT_FILENAME_FORMAT, filename, i);
out = fopen(ofilename, "w");
if (!out) {
perror(ofilename);
free(ofilename);
return GRIB_IO_PROBLEM;
}
while (err != GRIB_END_OF_FILE) {
mesg = wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err);
num_msg++;
/*printf("=1=%d\t%d\t%d\n",*count,size,insize);*/
if (mesg != NULL && err == 0) {
if (fwrite(mesg, 1, size, out) != size) {
perror(ofilename);
free(ofilename);
fclose(out);
return GRIB_IO_PROBLEM;
}
grib_context_free(c, mesg);
read_size += size;
msg_size += size;
if (read_size > chunk_size && msg_size < insize) {
if (verbose)
printf("Wrote output file %s (%zu msgs)\n", ofilename, num_msg);
fclose(out);
i++;
/* Start writing to the next file */
/*printf("=2=%d\t%d\n",*count,msg_size);*/
snprintf(ofilename, ofilenameMaxLen, OUTPUT_FILENAME_FORMAT, filename, i);
out = fopen(ofilename, "w");
if (!out) {
perror(ofilename);
free(ofilename);
return GRIB_IO_PROBLEM;
}
read_size = 0;
num_msg = 0;
}
(*count)++;
}
}
if (verbose)
printf("Wrote output file %s (%zu msgs)\n", ofilename, num_msg - 1);
fclose(out);
free(ofilename);
if (err == GRIB_END_OF_FILE)
err = GRIB_SUCCESS;
return err;
}
int main(int argc, char* argv[])
{
FILE* infh = NULL;
char* filename;
int i, status = 0;
int err = 0, nchunks = 0;
unsigned long count = 0;
if (argc < 3)
usage(argv[0]);
i = 1;
if (strcmp(argv[i], "-v") == 0) {
i++;
verbose = 1;
if (argc != 4)
usage(argv[0]);
}
/* add some error checking */
nchunks = atoi(argv[i]);
if (nchunks < 1 && nchunks != -1) {
fprintf(stderr, "ERROR: Invalid number %d. Please specify a positive integer or -1\n", nchunks);
return 1;
}
i++;
filename = argv[i];
if (path_is_directory(filename)) {
fprintf(stderr, "ERROR: %s: Is a directory\n", filename);
return 1;
}
infh = fopen(filename, "rb");
if (!infh) {
perror(filename);
return 1;
}
count = 0;
err = split_file(infh, filename, nchunks, &count);
if (err) {
fprintf(stderr, "ERROR: Failed to split file %s", filename);
fprintf(stderr, "\n");
status = 1;
}
else {
if (verbose)
printf("%7lu %s\n", count, filename);
}
fclose(infh);
return status;
}