From 0a6ce6ea3f8c05dd0a8dcf2a5f3c0d2f28ef2903 Mon Sep 17 00:00:00 2001 From: Shahram Najm Date: Sat, 25 Jul 2020 13:13:31 +0100 Subject: [PATCH 1/3] ECC-1137: BUFR performance: use fixed-length strings for descriptor name and units (Part 1) --- src/grib_accessor_class_bufr_elements_table.c | 6 +++-- ...grib_accessor_class_expanded_descriptors.c | 4 ++-- src/grib_api_internal.h | 4 ++-- src/grib_bufr_descriptor.c | 22 ++++++++++--------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/grib_accessor_class_bufr_elements_table.c b/src/grib_accessor_class_bufr_elements_table.c index 615df7350..2a1461648 100644 --- a/src/grib_accessor_class_bufr_elements_table.c +++ b/src/grib_accessor_class_bufr_elements_table.c @@ -354,10 +354,12 @@ static int bufr_get_from_table(grib_accessor* a, bufr_descriptor* v) if (!list) return GRIB_NOT_FOUND; - v->shortName = grib_context_strdup(a->context, list[1]); + strcpy(v->shortName, list[1]); + DebugAssert( strlen(v->shortName) < 128 ); v->type = convert_type(list[2]); /* v->name=grib_context_strdup(c,list[3]); See ECC-489 */ - v->units = grib_context_strdup(a->context, list[4]); + strcpy(v->units, list[4]); + DebugAssert( strlen(v->units) < 64 ); /* ECC-985: Scale and reference are often 0 so we can reduce calls to atol */ v->scale = atol_fast(list[5]); diff --git a/src/grib_accessor_class_expanded_descriptors.c b/src/grib_accessor_class_expanded_descriptors.c index 14e320ff8..fa5c72a0c 100644 --- a/src/grib_accessor_class_expanded_descriptors.c +++ b/src/grib_accessor_class_expanded_descriptors.c @@ -399,9 +399,9 @@ static void __expand(grib_accessor* a, bufr_descriptors_array* unexpanded, bufr_ bufr_descriptor* au = grib_bufr_descriptor_new(self->tablesAccessor, 999999, err); au->width = ccp->associatedFieldWidth; grib_bufr_descriptor_set_scale(au, 0); - au->shortName = grib_context_strdup(c, "associatedField"); + strcpy(au->shortName, "associatedField"); /* au->name=grib_context_strdup(c,"associated field"); See ECC-489 */ - au->units = grib_context_strdup(c, "associated units"); + strcpy(au->units, "associated units"); #if MYDEBUG for (idepth = 0; idepth < global_depth; idepth++) printf("\t"); diff --git a/src/grib_api_internal.h b/src/grib_api_internal.h index d9f4d3b87..ec6f3338f 100644 --- a/src/grib_api_internal.h +++ b/src/grib_api_internal.h @@ -820,8 +820,8 @@ struct bufr_descriptor int Y; int type; /*char* name; Not needed: All usage commented out. See ECC-489 */ - char* shortName; - char* units; + char shortName[128]; + char units[64]; long scale; double factor; long reference; diff --git a/src/grib_bufr_descriptor.c b/src/grib_bufr_descriptor.c index 5a2549b5b..1c5bdc919 100644 --- a/src/grib_bufr_descriptor.c +++ b/src/grib_bufr_descriptor.c @@ -35,8 +35,10 @@ bufr_descriptor* grib_bufr_descriptor_clone(bufr_descriptor* d) cd->X = d->X; cd->Y = d->Y; /* cd->name=grib_context_strdup(d->context,d->name); See ECC-489 */ - cd->shortName = grib_context_strdup(d->context, d->shortName); - cd->units = grib_context_strdup(d->context, d->units); + DebugAssert( strlen(d->shortName) < 128 ); + strcpy(cd->shortName, d->shortName); + DebugAssert( strlen(d->units) < 64 ); + strcpy(cd->units, d->units); cd->scale = d->scale; cd->factor = d->factor; cd->width = d->width; @@ -50,13 +52,13 @@ bufr_descriptor* grib_bufr_descriptor_clone(bufr_descriptor* d) int grib_bufr_descriptor_set_code(grib_accessor* tables_accessor, int code, bufr_descriptor* v) { int err = 0; - grib_context* c; + //grib_context* c; bufr_descriptor* d; if (!v) return GRIB_NULL_POINTER; - c = v->context; + //c = v->context; if (v->type == BUFR_DESCRIPTOR_TYPE_REPLICATION || v->type == BUFR_DESCRIPTOR_TYPE_OPERATOR) { v->code = code; @@ -74,10 +76,12 @@ int grib_bufr_descriptor_set_code(grib_accessor* tables_accessor, int code, bufr v->Y = d->Y; /* grib_context_free(c,v->name); See ECC-489 */ /* v->name=grib_context_strdup(c,d->name); See ECC-489 */ - grib_context_free(c, v->shortName); - v->shortName = grib_context_strdup(c, d->shortName); - grib_context_free(c, v->units); - v->units = grib_context_strdup(c, d->units); + + DebugAssert( strlen(d->shortName) < 128 ); + strcpy(v->shortName,d->shortName); + DebugAssert( strlen(d->units) < 64 ); + strcpy(v->units,d->units); + v->scale = d->scale; v->factor = d->factor; v->width = d->width; @@ -130,7 +134,5 @@ void grib_bufr_descriptor_delete(bufr_descriptor* v) c = v->context; /* grib_context_free(c,v->name); See ECC-489 */ - grib_context_free(c, v->shortName); - grib_context_free(c, v->units); grib_context_free(c, v); } From 0bfa65f9bfb4d185ddfc354f95346f79ad175a64 Mon Sep 17 00:00:00 2001 From: Shahram Najm Date: Sat, 25 Jul 2020 21:16:23 +0100 Subject: [PATCH 2/3] ECC-1137: BUFR performance: use fixed-length strings for descriptor name and units (Part 2) --- src/grib_accessor_class_bufr_elements_table.c | 6 +- src/grib_api_internal.h | 2 +- src/grib_bufr_descriptor.c | 7 --- tests/CMakeLists.txt | 2 + tests/bufr_check_descriptors.c | 59 +++++++++++++++++++ tests/bufr_check_descriptors.sh | 16 +++++ 6 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 tests/bufr_check_descriptors.c create mode 100755 tests/bufr_check_descriptors.sh diff --git a/src/grib_accessor_class_bufr_elements_table.c b/src/grib_accessor_class_bufr_elements_table.c index 2a1461648..46b0948cc 100644 --- a/src/grib_accessor_class_bufr_elements_table.c +++ b/src/grib_accessor_class_bufr_elements_table.c @@ -343,6 +343,8 @@ static int bufr_get_from_table(grib_accessor* a, bufr_descriptor* v) int ret = 0; char** list = 0; char code[7] = { 0 }; + const size_t maxlen_shortName = sizeof(v->shortName); + const size_t maxlen_units = sizeof(v->units); grib_trie* table = load_bufr_elements_table(a, &ret); if (ret) @@ -354,12 +356,12 @@ static int bufr_get_from_table(grib_accessor* a, bufr_descriptor* v) if (!list) return GRIB_NOT_FOUND; + DebugAssert( strlen(list[1]) < maxlen_shortName ); strcpy(v->shortName, list[1]); - DebugAssert( strlen(v->shortName) < 128 ); v->type = convert_type(list[2]); /* v->name=grib_context_strdup(c,list[3]); See ECC-489 */ + DebugAssert( strlen(list[4]) < maxlen_units ); strcpy(v->units, list[4]); - DebugAssert( strlen(v->units) < 64 ); /* ECC-985: Scale and reference are often 0 so we can reduce calls to atol */ v->scale = atol_fast(list[5]); diff --git a/src/grib_api_internal.h b/src/grib_api_internal.h index ec6f3338f..1bce88538 100644 --- a/src/grib_api_internal.h +++ b/src/grib_api_internal.h @@ -821,7 +821,7 @@ struct bufr_descriptor int type; /*char* name; Not needed: All usage commented out. See ECC-489 */ char shortName[128]; - char units[64]; + char units[128]; long scale; double factor; long reference; diff --git a/src/grib_bufr_descriptor.c b/src/grib_bufr_descriptor.c index 1c5bdc919..9518786e5 100644 --- a/src/grib_bufr_descriptor.c +++ b/src/grib_bufr_descriptor.c @@ -35,9 +35,7 @@ bufr_descriptor* grib_bufr_descriptor_clone(bufr_descriptor* d) cd->X = d->X; cd->Y = d->Y; /* cd->name=grib_context_strdup(d->context,d->name); See ECC-489 */ - DebugAssert( strlen(d->shortName) < 128 ); strcpy(cd->shortName, d->shortName); - DebugAssert( strlen(d->units) < 64 ); strcpy(cd->units, d->units); cd->scale = d->scale; cd->factor = d->factor; @@ -52,14 +50,11 @@ bufr_descriptor* grib_bufr_descriptor_clone(bufr_descriptor* d) int grib_bufr_descriptor_set_code(grib_accessor* tables_accessor, int code, bufr_descriptor* v) { int err = 0; - //grib_context* c; bufr_descriptor* d; if (!v) return GRIB_NULL_POINTER; - //c = v->context; - if (v->type == BUFR_DESCRIPTOR_TYPE_REPLICATION || v->type == BUFR_DESCRIPTOR_TYPE_OPERATOR) { v->code = code; v->F = code / 100000; @@ -77,9 +72,7 @@ int grib_bufr_descriptor_set_code(grib_accessor* tables_accessor, int code, bufr /* grib_context_free(c,v->name); See ECC-489 */ /* v->name=grib_context_strdup(c,d->name); See ECC-489 */ - DebugAssert( strlen(d->shortName) < 128 ); strcpy(v->shortName,d->shortName); - DebugAssert( strlen(d->units) < 64 ); strcpy(v->units,d->units); v->scale = d->scale; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5e7a8525c..7ed321c17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -30,6 +30,7 @@ list( APPEND test_bins bufr_ecc-517 bufr_get_element bufr_extract_headers + bufr_check_descriptors grib_sh_ieee64 ieee grib_sh_imag @@ -52,6 +53,7 @@ list(APPEND tests_no_data_reqd julian grib_dump_samples bufr_dump_samples + bufr_check_descriptors definitions grib_calendar grib_md5 diff --git a/tests/bufr_check_descriptors.c b/tests/bufr_check_descriptors.c new file mode 100644 index 000000000..6ac616f82 --- /dev/null +++ b/tests/bufr_check_descriptors.c @@ -0,0 +1,59 @@ +/* + * (C) Copyright 2005- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * + * In applying this licence, ECMWF does not waive the privileges and immunities granted to it by + * virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. + */ + +#include +#include "grib_api_internal.h" + +#define MAX_LEN_NAME 128 +#define MAX_LEN_UNIT 128 + +int main(int argc, char** argv) +{ + char* filename = NULL; + FILE* fp = NULL; + char line[1024] = {0,}; + char** list = NULL; + size_t i = 0, line_number = 0; + char* str_key = NULL; + char* str_units = NULL; + + Assert(argc == 2); + + filename = argv[1]; + fp = fopen(filename, "r"); + Assert(fp); + + while (fgets(line, sizeof(line) - 1, fp)) { + ++line_number; + Assert(strlen(line) > 0); + if (line[0] == '#') continue; /* Ignore first line with column titles */ + list = string_split(line, "|"); + Assert(list); + str_key = list[1]; + str_units = list[4]; + if (strlen(str_key) >= MAX_LEN_NAME) { + fprintf(stderr, "Error on line %lu: bufr_descriptor key name '%s' exceeds %d characters.\n", + line_number, str_key, MAX_LEN_NAME); + return 1; + } + if (strlen(str_units) >= MAX_LEN_UNIT) { + fprintf(stderr, "Error on line %lu: bufr_descriptor units '%s' exceeds %d characters.\n", + line_number, str_units, MAX_LEN_UNIT); + return 1; + } + + for (i = 0; list[i] != NULL; ++i) free(list[i]); + free(list); + } + + fclose(fp); + + return 0; +} diff --git a/tests/bufr_check_descriptors.sh b/tests/bufr_check_descriptors.sh new file mode 100755 index 000000000..26274afe4 --- /dev/null +++ b/tests/bufr_check_descriptors.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# (C) Copyright 2005- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities granted to it by +# virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. +# + +. ./include.sh + +for file in `find ${ECCODES_DEFINITION_PATH}/bufr/ -name 'element.table' -print` +do + ${test_dir}/bufr_check_descriptors $file +done From ad1881a3752bdeeee093196afa05f397109f3b76 Mon Sep 17 00:00:00 2001 From: Shahram Najm Date: Sat, 25 Jul 2020 21:22:44 +0100 Subject: [PATCH 3/3] ECC-1137: Test --- tests/bufr_check_descriptors.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/bufr_check_descriptors.c b/tests/bufr_check_descriptors.c index 6ac616f82..042af5c6c 100644 --- a/tests/bufr_check_descriptors.c +++ b/tests/bufr_check_descriptors.c @@ -11,9 +11,6 @@ #include #include "grib_api_internal.h" -#define MAX_LEN_NAME 128 -#define MAX_LEN_UNIT 128 - int main(int argc, char** argv) { char* filename = NULL; @@ -23,6 +20,9 @@ int main(int argc, char** argv) size_t i = 0, line_number = 0; char* str_key = NULL; char* str_units = NULL; + bufr_descriptor v = {0,}; + const size_t maxlen_keyName = sizeof(v.shortName); + const size_t maxlen_units = sizeof(v.units); Assert(argc == 2); @@ -38,14 +38,14 @@ int main(int argc, char** argv) Assert(list); str_key = list[1]; str_units = list[4]; - if (strlen(str_key) >= MAX_LEN_NAME) { - fprintf(stderr, "Error on line %lu: bufr_descriptor key name '%s' exceeds %d characters.\n", - line_number, str_key, MAX_LEN_NAME); + if (strlen(str_key) >= maxlen_keyName) { + fprintf(stderr, "Error on line %lu: bufr_descriptor key name '%s' exceeds %lu characters.\n", + line_number, str_key, maxlen_keyName); return 1; } - if (strlen(str_units) >= MAX_LEN_UNIT) { - fprintf(stderr, "Error on line %lu: bufr_descriptor units '%s' exceeds %d characters.\n", - line_number, str_units, MAX_LEN_UNIT); + if (strlen(str_units) >= maxlen_units) { + fprintf(stderr, "Error on line %lu: bufr_descriptor units '%s' exceeds %lu characters.\n", + line_number, str_units, maxlen_units); return 1; }