2017-10-27 09:57:49 +00:00
|
|
|
/*
|
2020-01-28 14:32:34 +00:00
|
|
|
* (C) Copyright 2005- ECMWF.
|
2017-10-27 09:57:49 +00:00
|
|
|
*
|
|
|
|
* This software is licensed under the terms of the Apache Licence Version 2.0
|
|
|
|
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
*
|
|
|
|
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
|
|
|
|
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
|
|
|
|
*/
|
|
|
|
|
2017-12-01 13:28:57 +00:00
|
|
|
/*
|
|
|
|
* Description:
|
|
|
|
* Split an input file (GRIB, BUFR etc) into chunks of roughly the same size.
|
|
|
|
* The output files are named input_01, input_02 etc. This is much faster than grib_copy/bufr_copy
|
2022-12-07 19:38:35 +00:00
|
|
|
*
|
2019-08-08 02:13:37 +00:00
|
|
|
* 2019-07-26 W.Qu Allow an input file to be split into each individual message (if nchunk=-1)
|
2017-12-01 13:28:57 +00:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2017-10-27 09:57:49 +00:00
|
|
|
#include "grib_api_internal.h"
|
2017-10-27 18:05:50 +00:00
|
|
|
#include <assert.h>
|
2017-10-27 09:57:49 +00:00
|
|
|
|
2024-02-14 12:07:42 +00:00
|
|
|
static bool verbose = false;
|
2019-08-08 02:13:37 +00:00
|
|
|
static const char* OUTPUT_FILENAME_FORMAT = "%s_%03d"; /* x_001, x_002 etc */
|
2017-10-27 09:57:49 +00:00
|
|
|
static void usage(const char* prog)
|
|
|
|
{
|
2020-01-22 13:10:59 +00:00
|
|
|
printf("Usage: %s [-v] nchunks infile\n", prog);
|
2019-08-22 13:31:26 +00:00
|
|
|
printf("Setting nchunks=-1 splits infile into individual messages\n");
|
2017-10-27 09:57:49 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2024-02-14 12:07:42 +00:00
|
|
|
static void print_num_messages_written(const char* output_filename, size_t n)
|
|
|
|
{
|
|
|
|
printf("Wrote output file %s (%zu message%s)\n", output_filename, n, n==1?"":"s");
|
|
|
|
}
|
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
static int split_file(FILE* in, const char* filename, const int nchunks, unsigned long* count)
|
2017-10-27 09:57:49 +00:00
|
|
|
{
|
2020-01-22 13:10:59 +00:00
|
|
|
void* mesg = NULL;
|
|
|
|
size_t size = 0, read_size = 0, insize = 0, chunk_size, msg_size = 0, num_msg = 0;
|
|
|
|
off_t offset = 0;
|
2023-08-27 12:31:21 +00:00
|
|
|
int err = GRIB_SUCCESS, i = 0;
|
2020-01-22 13:10:59 +00:00
|
|
|
grib_context* c = grib_context_get_default();
|
2023-08-27 12:31:21 +00:00
|
|
|
assert(in);
|
2017-10-27 09:57:49 +00:00
|
|
|
|
2017-10-27 18:05:50 +00:00
|
|
|
/* name of output file */
|
2023-08-27 12:31:21 +00:00
|
|
|
size_t ofilenameMaxLen = strlen(filename) + 10;
|
|
|
|
char* ofilename = (char*)calloc(1, ofilenameMaxLen);
|
2017-10-27 09:57:49 +00:00
|
|
|
|
|
|
|
fseeko(in, 0, SEEK_END);
|
|
|
|
insize = ftello(in);
|
|
|
|
fseeko(in, 0, SEEK_SET);
|
2019-08-22 13:51:49 +00:00
|
|
|
if (nchunks == -1) {
|
|
|
|
chunk_size = size;
|
2020-01-22 13:10:59 +00:00
|
|
|
}
|
|
|
|
else {
|
2019-08-22 13:51:49 +00:00
|
|
|
assert(nchunks > 0);
|
2020-01-22 13:10:59 +00:00
|
|
|
chunk_size = insize / nchunks;
|
2019-08-08 02:13:37 +00:00
|
|
|
}
|
2017-10-27 09:57:49 +00:00
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
i = 1;
|
2022-11-10 19:18:43 +00:00
|
|
|
snprintf(ofilename, ofilenameMaxLen, OUTPUT_FILENAME_FORMAT, filename, i);
|
2023-08-27 12:31:21 +00:00
|
|
|
FILE* out = fopen(ofilename, "w");
|
2017-10-27 09:57:49 +00:00
|
|
|
if (!out) {
|
2017-10-27 18:05:50 +00:00
|
|
|
perror(ofilename);
|
|
|
|
free(ofilename);
|
|
|
|
return GRIB_IO_PROBLEM;
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
while (err != GRIB_END_OF_FILE) {
|
|
|
|
mesg = wmo_read_any_from_file_malloc(in, 0, &size, &offset, &err);
|
2019-08-08 02:13:37 +00:00
|
|
|
num_msg++;
|
2020-01-22 13:10:59 +00:00
|
|
|
if (mesg != NULL && err == 0) {
|
|
|
|
if (fwrite(mesg, 1, size, out) != size) {
|
2017-10-27 18:05:50 +00:00
|
|
|
perror(ofilename);
|
|
|
|
free(ofilename);
|
|
|
|
fclose(out);
|
|
|
|
return GRIB_IO_PROBLEM;
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|
2020-01-22 13:10:59 +00:00
|
|
|
grib_context_free(c, mesg);
|
|
|
|
read_size += size;
|
|
|
|
msg_size += size;
|
|
|
|
if (read_size > chunk_size && msg_size < insize) {
|
2024-02-14 12:07:42 +00:00
|
|
|
if (verbose) {
|
|
|
|
print_num_messages_written(ofilename, num_msg);
|
|
|
|
}
|
2017-10-27 18:05:50 +00:00
|
|
|
fclose(out);
|
|
|
|
i++;
|
|
|
|
/* Start writing to the next file */
|
2019-08-08 02:13:37 +00:00
|
|
|
/*printf("=2=%d\t%d\n",*count,msg_size);*/
|
2022-11-10 19:18:43 +00:00
|
|
|
snprintf(ofilename, ofilenameMaxLen, OUTPUT_FILENAME_FORMAT, filename, i);
|
2020-01-22 13:10:59 +00:00
|
|
|
out = fopen(ofilename, "w");
|
2017-10-27 18:05:50 +00:00
|
|
|
if (!out) {
|
|
|
|
perror(ofilename);
|
|
|
|
free(ofilename);
|
|
|
|
return GRIB_IO_PROBLEM;
|
|
|
|
}
|
2020-01-22 13:10:59 +00:00
|
|
|
read_size = 0;
|
|
|
|
num_msg = 0;
|
2017-10-27 18:05:50 +00:00
|
|
|
}
|
|
|
|
(*count)++;
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|
|
|
|
}
|
2024-02-14 12:07:42 +00:00
|
|
|
if (verbose) {
|
|
|
|
print_num_messages_written(ofilename, num_msg - 1);
|
|
|
|
}
|
2017-10-27 09:57:49 +00:00
|
|
|
fclose(out);
|
2017-10-27 10:20:09 +00:00
|
|
|
free(ofilename);
|
2017-10-27 09:57:49 +00:00
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
if (err == GRIB_END_OF_FILE)
|
|
|
|
err = GRIB_SUCCESS;
|
2017-10-27 09:57:49 +00:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
int main(int argc, char* argv[])
|
2017-10-27 09:57:49 +00:00
|
|
|
{
|
2020-01-22 13:10:59 +00:00
|
|
|
int i, status = 0;
|
|
|
|
int err = 0, nchunks = 0;
|
|
|
|
unsigned long count = 0;
|
2017-10-27 09:57:49 +00:00
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
if (argc < 3)
|
|
|
|
usage(argv[0]);
|
2017-10-27 09:57:49 +00:00
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
i = 1;
|
|
|
|
if (strcmp(argv[i], "-v") == 0) {
|
2017-10-27 18:05:50 +00:00
|
|
|
i++;
|
2024-02-14 12:07:42 +00:00
|
|
|
verbose = true;
|
2020-01-22 13:10:59 +00:00
|
|
|
if (argc != 4)
|
|
|
|
usage(argv[0]);
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* add some error checking */
|
2020-01-22 13:10:59 +00:00
|
|
|
nchunks = atoi(argv[i]);
|
|
|
|
if (nchunks < 1 && nchunks != -1) {
|
|
|
|
fprintf(stderr, "ERROR: Invalid number %d. Please specify a positive integer or -1\n", nchunks);
|
2017-10-27 18:05:50 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2017-10-27 09:57:49 +00:00
|
|
|
|
|
|
|
i++;
|
2023-08-27 12:31:21 +00:00
|
|
|
const char* filename = argv[i];
|
2019-12-27 11:38:59 +00:00
|
|
|
if (path_is_directory(filename)) {
|
|
|
|
fprintf(stderr, "ERROR: %s: Is a directory\n", filename);
|
|
|
|
return 1;
|
2017-10-27 18:05:50 +00:00
|
|
|
}
|
2023-08-27 12:31:21 +00:00
|
|
|
FILE* infh = fopen(filename, "rb");
|
2017-10-27 09:57:49 +00:00
|
|
|
if (!infh) {
|
2017-10-27 18:05:50 +00:00
|
|
|
perror(filename);
|
|
|
|
return 1;
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|
|
|
|
|
2020-01-22 13:10:59 +00:00
|
|
|
count = 0;
|
|
|
|
err = split_file(infh, filename, nchunks, &count);
|
2017-10-27 09:57:49 +00:00
|
|
|
if (err) {
|
2020-01-22 13:10:59 +00:00
|
|
|
fprintf(stderr, "ERROR: Failed to split file %s", filename);
|
|
|
|
fprintf(stderr, "\n");
|
2017-10-27 18:05:50 +00:00
|
|
|
status = 1;
|
2020-01-22 13:10:59 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (verbose)
|
|
|
|
printf("%7lu %s\n", count, filename);
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fclose(infh);
|
|
|
|
|
2017-10-27 18:05:50 +00:00
|
|
|
return status;
|
2017-10-27 09:57:49 +00:00
|
|
|
}
|