eccodes/tools/codes_split_file.cc

168 lines
4.6 KiB
C++
Raw Permalink Normal View History

2017-10-27 09:57:49 +00:00
/*
2020-01-28 14:32:34 +00:00
* (C) Copyright 2005- ECMWF.
2017-10-27 09:57:49 +00:00
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
*
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
*/
2017-12-01 13:28:57 +00:00
/*
* Description:
* Split an input file (GRIB, BUFR etc) into chunks of roughly the same size.
* The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy
2022-12-07 19:38:35 +00:00
*
* 2019-07-26 W.Qu Allow an input file to be split into each individual message (if nchunk=-1)
2017-12-01 13:28:57 +00:00
*
*/
2017-10-27 09:57:49 +00:00
#include "grib_api_internal.h"
#include <assert.h>
2017-10-27 09:57:49 +00:00
2024-02-14 12:07:42 +00:00
static bool verbose = false;
static const char* OUTPUT_FILENAME_FORMAT = "%s_%03d"; /* x_001, x_002 etc */
2017-10-27 09:57:49 +00:00
static void usage(const char* prog)
{
2020-01-22 13:10:59 +00:00
printf("Usage: %s [-v] nchunks infile\n", prog);
printf("Setting nchunks=-1 splits infile into individual messages\n");
2017-10-27 09:57:49 +00:00
exit(1);
}
2024-02-14 12:07:42 +00:00
static void print_num_messages_written(const char* output_filename, size_t n)
{
printf("Wrote output file %s (%zu message%s)\n", output_filename, n, n==1?"":"s");
}
2020-01-22 13:10:59 +00:00
static int split_file(FILE* in, const char* filename, const int nchunks, unsigned long* count)
2017-10-27 09:57:49 +00:00
{
2020-01-22 13:10:59 +00:00
void* mesg = NULL;
size_t size = 0, read_size = 0, insize = 0, chunk_size, msg_size = 0, num_msg = 0;
off_t offset = 0;
2023-08-27 12:31:21 +00:00
int err = GRIB_SUCCESS, i = 0;
2020-01-22 13:10:59 +00:00
grib_context* c = grib_context_get_default();
2023-08-27 12:31:21 +00:00
assert(in);
2017-10-27 09:57:49 +00:00
/* name of output file */
2023-08-27 12:31:21 +00:00
size_t ofilenameMaxLen = strlen(filename) + 10;
char* ofilename = (char*)calloc(1, ofilenameMaxLen);
2017-10-27 09:57:49 +00:00
fseeko(in, 0, SEEK_END);
insize = ftello(in);
fseeko(in, 0, SEEK_SET);
2019-08-22 13:51:49 +00:00
if (nchunks == -1) {
chunk_size = size;
2020-01-22 13:10:59 +00:00
}
else {
2019-08-22 13:51:49 +00:00
assert(nchunks > 0);
2020-01-22 13:10:59 +00:00
chunk_size = insize / nchunks;
}
2017-10-27 09:57:49 +00:00
2020-01-22 13:10:59 +00:00
i = 1;
snprintf(ofilename, ofilenameMaxLen, OUTPUT_FILENAME_FORMAT, filename, i);
2023-08-27 12:31:21 +00:00
FILE* out = fopen(ofilename, "w");
2017-10-27 09:57:49 +00:00
if (!out) {
perror(ofilename);
free(ofilename);
return GRIB_IO_PROBLEM;
2017-10-27 09:57:49 +00:00
}
2020-01-22 13:10:59 +00:00
while (err != GRIB_END_OF_FILE) {
mesg = wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err);
num_msg++;
2020-01-22 13:10:59 +00:00
if (mesg != NULL && err == 0) {
if (fwrite(mesg, 1, size, out) != size) {
perror(ofilename);
free(ofilename);
fclose(out);
return GRIB_IO_PROBLEM;
2017-10-27 09:57:49 +00:00
}
2020-01-22 13:10:59 +00:00
grib_context_free(c, mesg);
read_size += size;
msg_size += size;
if (read_size > chunk_size && msg_size < insize) {
2024-02-14 12:07:42 +00:00
if (verbose) {
print_num_messages_written(ofilename, num_msg);
}
fclose(out);
i++;
/* Start writing to the next file */
/*printf("=2=%d\t%d\n",*count,msg_size);*/
snprintf(ofilename, ofilenameMaxLen, OUTPUT_FILENAME_FORMAT, filename, i);
2020-01-22 13:10:59 +00:00
out = fopen(ofilename, "w");
if (!out) {
perror(ofilename);
free(ofilename);
return GRIB_IO_PROBLEM;
}
2020-01-22 13:10:59 +00:00
read_size = 0;
num_msg = 0;
}
(*count)++;
2017-10-27 09:57:49 +00:00
}
}
2024-02-14 12:07:42 +00:00
if (verbose) {
print_num_messages_written(ofilename, num_msg - 1);
}
2017-10-27 09:57:49 +00:00
fclose(out);
2017-10-27 10:20:09 +00:00
free(ofilename);
2017-10-27 09:57:49 +00:00
2020-01-22 13:10:59 +00:00
if (err == GRIB_END_OF_FILE)
err = GRIB_SUCCESS;
2017-10-27 09:57:49 +00:00
return err;
}
2020-01-22 13:10:59 +00:00
int main(int argc, char* argv[])
2017-10-27 09:57:49 +00:00
{
2020-01-22 13:10:59 +00:00
int i, status = 0;
int err = 0, nchunks = 0;
unsigned long count = 0;
2017-10-27 09:57:49 +00:00
2020-01-22 13:10:59 +00:00
if (argc < 3)
usage(argv[0]);
2017-10-27 09:57:49 +00:00
2020-01-22 13:10:59 +00:00
i = 1;
if (strcmp(argv[i], "-v") == 0) {
i++;
2024-02-14 12:07:42 +00:00
verbose = true;
2020-01-22 13:10:59 +00:00
if (argc != 4)
usage(argv[0]);
2017-10-27 09:57:49 +00:00
}
/* add some error checking */
2020-01-22 13:10:59 +00:00
nchunks = atoi(argv[i]);
if (nchunks < 1 && nchunks != -1) {
fprintf(stderr, "ERROR: Invalid number %d. Please specify a positive integer or -1\n", nchunks);
return 1;
}
2017-10-27 09:57:49 +00:00
i++;
2023-08-27 12:31:21 +00:00
const char* filename = argv[i];
if (path_is_directory(filename)) {
fprintf(stderr, "ERROR: %s: Is a directory\n", filename);
return 1;
}
2023-08-27 12:31:21 +00:00
FILE* infh = fopen(filename, "rb");
2017-10-27 09:57:49 +00:00
if (!infh) {
perror(filename);
return 1;
2017-10-27 09:57:49 +00:00
}
2020-01-22 13:10:59 +00:00
count = 0;
err = split_file(infh, filename, nchunks, &count);
2017-10-27 09:57:49 +00:00
if (err) {
2020-01-22 13:10:59 +00:00
fprintf(stderr, "ERROR: Failed to split file %s", filename);
fprintf(stderr, "\n");
status = 1;
2020-01-22 13:10:59 +00:00
}
else {
if (verbose)
printf("%7lu %s\n", count, filename);
2017-10-27 09:57:49 +00:00
}
fclose(infh);
return status;
2017-10-27 09:57:49 +00:00
}