From 7175b9c664e30e2cda647ec8b045ecb5c7e8b3cc Mon Sep 17 00:00:00 2001 From: Shahram Najm Date: Wed, 31 Jul 2013 15:15:45 +0100 Subject: [PATCH] GRIB-153: gribapi patch to speed up encoding on IBM P6/7 --- NOTICE | 8 +- configure.ac | 13 + src/CMakeLists.txt | 45 +- src/Makefile.am | 3 +- src/encode_double_array.c | 427 +++++++++ src/grib_accessor_class_data_simple_packing.c | 809 +++++++++--------- src/grib_bits.c | 145 ++-- src/grib_bits_ibmpow.c | 148 ++++ src/grib_bits_ibmpow_opt.c | 298 +++++++ src/minmax_val.c | 332 +++++++ 10 files changed, 1732 insertions(+), 496 deletions(-) create mode 100644 src/encode_double_array.c create mode 100644 src/grib_bits_ibmpow.c create mode 100644 src/grib_bits_ibmpow_opt.c create mode 100644 src/minmax_val.c diff --git a/NOTICE b/NOTICE index f8fb39428..a35ae5131 100644 --- a/NOTICE +++ b/NOTICE @@ -1,7 +1,11 @@ ECMWF GRIB API Copyright 2005-2013 ECMWF. -This product includes software developed at ECMWF (http://www.ecmwf.int). +This product includes software developed at +ECMWF (http://www.ecmwf.int). -Parts of the definitions provided by WMO (http://www.wmo.int/pages/index_en.html) +Parts of the definitions provided by +WMO (http://www.wmo.int/pages/index_en.html) +IBM POWER optimisations were developed at +The Max-Planck-Institute for Meteorology. diff --git a/configure.ac b/configure.ac index c081c7a40..8ee88759b 100755 --- a/configure.ac +++ b/configure.ac @@ -89,6 +89,19 @@ fi AC_DEFINE_UNQUOTED(GRIB_PTHREADS,$GRIB_PTHREADS,1->pthreads enabled 0->pthreads disabled) AC_DEFINE_UNQUOTED(GRIB_LINUX_PTHREADS,$GRIB_LINUX_PTHREADS,1->pthreads enabled 0->pthreads disabled) +dnl check IBM POWER 6/7 optimisations option +AC_ARG_ENABLE([ibmpower67_opt], + [AS_HELP_STRING([--enable-ibmpower67_opt],[enable IBM POWER 6/7 optimisations [by default disabled]])], + [ibmpower67_opts=${enableval}] , [ibmpower67_opts=no] +) +if test "x${ibmpower67_opts}" = xyes; then + GRIB_IBMPOWER67_OPT=1 +else + GRIB_IBMPOWER67_OPT=0 +fi +AC_DEFINE_UNQUOTED(GRIB_IBMPOWER67_OPT,$GRIB_IBMPOWER67_OPT,1->IBM Power6/7 Optimisations enabled 0->IBM Power6/7 Optimisations disabled) + + dnl check on uppercase fortran modules not working to be fixed dnl some fortran compilers change the name of the .mod file in upper case! ac_cv_prog_f90_uppercase_mod=no diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0053f554a..dd64840e8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -302,33 +302,34 @@ list( APPEND grib_api_srcs grib_windef.h ) -# list( APPEND grib_api_extra_srcs -# grib_bits_any_endian.c -# grib_bits_any_endian_simple.c -# ) +list( APPEND grib_api_extra_srcs + grib_bits_fast_big_endian.c grib_bits_any_endian.c + grib_bits_fast_big_endian_vector.c grib_bits_any_endian_vector.c + grib_bits_fast_big_endian_simple.c grib_bits_any_endian_simple.c + grib_bits_fast_big_endian_omp.c grib_bits_any_endian_omp.c + encode_double_array.c + grib_bits_ibmpow.c + grib_bits_ibmpow_opt.c + minmax_val.c +) configure_file( grib_api_version.c.in grib_api_version.c @ONLY ) -ecbuild_generate_yy( YYPREFIX grib_yy - YACC griby - LEX gribl - DEPENDANT action.c ) +ecbuild_generate_yy(YYPREFIX grib_yy + YACC griby + LEX gribl + DEPENDANT action.c ) -ecbuild_add_library( TARGET grib_api - SOURCES - grib_api_version.c - griby.c gribl.c - ${grib_api_extra_srcs} - ${grib_api_srcs} - GENERATED - grib_api_version.c - INCLUDES - ${GRIB_API_EXTRA_INCLUDE_DIRS} - LIBS - ${GRIB_API_EXTRA_LIBRARIES} - ${CMATH_LIBRARIES} ) +ecbuild_add_library(TARGET grib_api + SOURCES grib_api_version.c + griby.c gribl.c + ${grib_api_srcs} + GENERATED grib_api_version.c + INCLUDES ${GRIB_API_EXTRA_INCLUDE_DIRS} + LIBS ${GRIB_API_EXTRA_LIBRARIES} + TEMPLATES ${grib_api_extra_srcs} + ${CMATH_LIBRARIES} ) # TODO: add support for jpg and png - install( FILES grib_api.h DESTINATION include ) diff --git a/src/Makefile.am b/src/Makefile.am index 7029f96fe..d76b170b6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -332,7 +332,8 @@ EXTRA_DIST= dummy.am griby.y gribl.l grib_accessor_classes_hash.c \ grib_bits_fast_big_endian.c grib_bits_any_endian.c \ grib_bits_fast_big_endian_vector.c grib_bits_any_endian_vector.c \ grib_bits_fast_big_endian_simple.c grib_bits_any_endian_simple.c \ - grib_bits_fast_big_endian_omp.c grib_bits_any_endian_omp.c + grib_bits_fast_big_endian_omp.c grib_bits_any_endian_omp.c \ + encode_double_array.c grib_bits_ibmpow.c grib_bits_ibmpow_opt.c minmax_val.c include $(DEVEL_RULES) diff --git a/src/encode_double_array.c b/src/encode_double_array.c new file mode 100644 index 000000000..fa6049830 --- /dev/null +++ b/src/encode_double_array.c @@ -0,0 +1,427 @@ +/* + * Copyright 2005-2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * + * In applying this licence, ECMWF does not waive the privileges and immunities granted to it by + * virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. + */ + + +#ifdef _GET_IBM_COUNTER +#include +#endif + + +static +void encode_double_array_common(int numBits, long packStart, long datasize, GRIBPACK *lGrib, + const double *data, double zref, double factor, long *gz) +{ + long i, z = *gz; + unsigned int ival; + int cbits, jbits; + unsigned int c; + static unsigned int mask[] = {0,1,3,7,15,31,63,127,255}; + + /* code from gribw routine flist2bitstream */ + + cbits = 8; + c = 0; + for ( i = packStart; i < datasize; i++ ) + { + /* note float -> unsigned int .. truncate */ + ival = (unsigned int) ((data[i] - zref) * factor + 0.5); + /* + if ( ival > max_nbpv_pow2 ) ival = max_nbpv_pow2; + if ( ival < 0 ) ival = 0; + */ + jbits = numBits; + while ( cbits <= jbits ) + { + if ( cbits == 8 ) + { + jbits -= 8; + lGrib[z++] = (ival >> jbits) & 0xFF; + } + else + { + jbits -= cbits; + lGrib[z++] = (c << cbits) + ((ival >> jbits) & mask[cbits]); + cbits = 8; + c = 0; + } + } + /* now jbits < cbits */ + if ( jbits ) + { + c = (c << jbits) + (ival & mask[jbits]); + cbits -= jbits; + } + } + if ( cbits != 8 ) lGrib[z++] = c << cbits; + + *gz = z; +} + +static +void encode_double_array_byte(int numBits, long packStart, long datasize, + GRIBPACK *restrict lGrib, + const double *restrict data, + double zref, double factor, long *restrict gz) +{ + long i, z = *gz; + unsigned long ival; + double tmp; + + data += packStart; + datasize -= packStart; + + if ( numBits == 8 ) + { +#ifdef _GET_IBM_COUNTER + hpmStart(2, "pack 8 bit base"); +#endif +#if defined (CRAY) +#pragma _CRI ivdep +#elif defined (SX) +#pragma vdir nodep +#elif defined (__uxp__) +#pragma loop novrec +#endif + for ( i = 0; i < datasize; i++ ) + { + tmp = ((data[i] - zref) * factor + 0.5); + ival = (unsigned long) tmp; + lGrib[z ] = ival; + z++; + } +#ifdef _GET_IBM_COUNTER + hpmStop(2); +#endif + } + else if ( numBits == 16 ) + { +#ifdef _GET_IBM_COUNTER + hpmStart(3, "pack 16 bit base"); +#endif +#if defined (CRAY) +#pragma _CRI ivdep +#elif defined (SX) +#pragma vdir nodep +#elif defined (__uxp__) +#pragma loop novrec +#endif + for ( i = 0; i < datasize; i++ ) + { + tmp = ((data[i] - zref) * factor + 0.5); + ival = (unsigned long) tmp; + lGrib[z ] = ival >> 8; + lGrib[z+1] = ival; + z += 2; + } +#ifdef _GET_IBM_COUNTER + hpmStop(3); +#endif + } + else if ( numBits == 24 ) + { +#ifdef _GET_IBM_COUNTER + hpmStart(4, "pack 24 bit base"); +#endif +#if defined (CRAY) +#pragma _CRI ivdep +#elif defined (SX) +#pragma vdir nodep +#elif defined (__uxp__) +#pragma loop novrec +#endif + for ( i = 0; i < datasize; i++ ) + { + tmp = ((data[i] - zref) * factor + 0.5); + ival = (unsigned long) tmp; + lGrib[z ] = ival >> 16; + lGrib[z+1] = ival >> 8; + lGrib[z+2] = ival; + z += 3; + } +#ifdef _GET_IBM_COUNTER + hpmStop(4); +#endif + } + else if ( numBits == 32 ) + { +#ifdef _GET_IBM_COUNTER + hpmStart(5, "pack 32 bit base"); +#endif +#if defined (CRAY) +#pragma _CRI ivdep +#elif defined (SX) +#pragma vdir nodep +#elif defined (__uxp__) +#pragma loop novrec +#endif + for ( i = 0; i < datasize; i++ ) + { + tmp = ((data[i] - zref) * factor + 0.5); + ival = (unsigned long) tmp; + lGrib[z ] = ival >> 24; + lGrib[z+1] = ival >> 16; + lGrib[z+2] = ival >> 8; + lGrib[z+3] = ival; + z += 4; + } +#ifdef _GET_IBM_COUNTER + hpmStop(5); +#endif + } + else if ( numBits > 0 && numBits <= 32 ) + { + encode_double_array_common(numBits, 0, datasize, lGrib, + data, zref, factor, &z); + } + else if ( numBits == 0 ) + { + } + else + { + Error("Unimplemented packing factor %d!", numBits); + } + + *gz = z; +} + +static +void encode_double_array_unrolled(int numBits, long packStart, long datasize, + GRIBPACK *restrict lGrib, + const double *restrict data, + double zref, double factor, long *restrict gz) +{ + U_BYTEORDER; + long i, j, z = *gz; + double tmp; +#ifdef _ARCH_PWR6 +#define __UNROLL_DEPTH_2 8 +#else +#define __UNROLL_DEPTH_2 8 +#endif + + data += packStart; + datasize -= packStart; + + { + long residual = datasize % __UNROLL_DEPTH_2; + long ofs = datasize - residual; + double dval[__UNROLL_DEPTH_2]; + unsigned long ival; + + /* reducing FP operations to single FMA is slowing down on pwr6 ... */ + + if ( numBits == 8 ) + { + unsigned char *cgrib = (unsigned char *) (lGrib + z); +#ifdef _GET_IBM_COUNTER + hpmStart(2, "pack 8 bit unrolled"); +#endif + for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 ) + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + dval[j] = ((data[i+j] - zref) * factor + 0.5); + } + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + *cgrib = (unsigned long) dval[j]; + cgrib++; + z++; + } + } + for (j = 0; j < residual; j++) + { + dval[j] = ((data[ofs+j] - zref) * factor + 0.5); + } + for (j = 0; j < residual; j++) + { + *cgrib = (unsigned long) dval[j]; + cgrib++; + z++; + } +#ifdef _GET_IBM_COUNTER + hpmStop(2); +#endif + } + else if ( numBits == 16 ) + { + unsigned short *sgrib = (unsigned short *) (lGrib + z); +#ifdef _GET_IBM_COUNTER + hpmStart(3, "pack 16 bit unrolled"); +#endif + for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 ) + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + dval[j] = ((data[i+j] - zref) * factor + 0.5); + } + if ( IS_BIGENDIAN() ) + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + *sgrib = (unsigned long) dval[j]; + sgrib++; + z += 2; + } + } + else + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + ival = (unsigned long) dval[j]; + lGrib[z ] = ival >> 8; + lGrib[z+1] = ival; + z += 2; + } + } + } + for (j = 0; j < residual; j++) + { + dval[j] = ((data[ofs+j] - zref) * factor + 0.5); + } + if ( IS_BIGENDIAN() ) + { + for (j = 0; j < residual; j++) + { + *sgrib = (unsigned long) dval[j]; + sgrib++; + z += 2; + } + } + else + { + for (j = 0; j < residual; j++) + { + ival = (unsigned long) dval[j]; + lGrib[z ] = ival >> 8; + lGrib[z+1] = ival; + z += 2; + } + } +#ifdef _GET_IBM_COUNTER + hpmStop(3); +#endif + } + else if ( numBits == 24 ) + { +#ifdef _GET_IBM_COUNTER + hpmStart(4, "pack 24 bit unrolled"); +#endif + for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 ) + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + dval[j] = ((data[i+j] - zref) * factor + 0.5); + } + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + ival = (unsigned long) dval[j]; + lGrib[z ] = ival >> 16; + lGrib[z+1] = ival >> 8; + lGrib[z+2] = ival; + z += 3; + } + } + for (j = 0; j < residual; j++) + { + dval[j] = ((data[ofs+j] - zref) * factor + 0.5); + } + for (j = 0; j < residual; j++) + { + ival = (unsigned long) dval[j]; + lGrib[z ] = ival >> 16; + lGrib[z+1] = ival >> 8; + lGrib[z+2] = ival; + z += 3; + } +#ifdef _GET_IBM_COUNTER + hpmStop(4); +#endif + } + else if ( numBits == 32 ) + { +#ifdef _GET_IBM_COUNTER + hpmStart(5, "pack 32 bit unrolled"); +#endif + unsigned int *igrib = (unsigned int *) (lGrib + z); + for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 ) + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + dval[j] = ((data[i+j] - zref) * factor + 0.5); + } + if ( IS_BIGENDIAN() ) + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + *igrib = (unsigned long) dval[j]; + igrib++; + z += 4; + } + } + else + { + for (j = 0; j < __UNROLL_DEPTH_2; j++) + { + ival = (unsigned long) dval[j]; + lGrib[z ] = ival >> 24; + lGrib[z+1] = ival >> 16; + lGrib[z+2] = ival >> 8; + lGrib[z+3] = ival; + z += 4; + } + } + } + for (j = 0; j < residual; j++) + { + dval[j] = ((data[ofs+j] - zref) * factor + 0.5); + } + if ( IS_BIGENDIAN() ) + { + for (j = 0; j < residual; j++) + { + *igrib = (unsigned long) dval[j]; + igrib++; + z += 4; + } + } + else + { + for (j = 0; j < residual; j++) + { + ival = (unsigned long) dval[j]; + lGrib[z ] = ival >> 24; + lGrib[z+1] = ival >> 16; + lGrib[z+2] = ival >> 8; + lGrib[z+3] = ival; + z += 4; + } + } +#ifdef _GET_IBM_COUNTER + hpmStop(5); +#endif + } + else if ( numBits > 0 && numBits <= 32 ) + { + encode_double_array_common(numBits, 0, datasize, lGrib, + data, zref, factor, &z); + } + else if ( numBits == 0 ) + { + } + else + { + Error("Unimplemented packing factor %d!", numBits); + } + } + + *gz = z; +#undef __UNROLL_DEPTH_2 +} diff --git a/src/grib_accessor_class_data_simple_packing.c b/src/grib_accessor_class_data_simple_packing.c index 06dd68e23..4047c41d1 100644 --- a/src/grib_accessor_class_data_simple_packing.c +++ b/src/grib_accessor_class_data_simple_packing.c @@ -156,27 +156,27 @@ static void init_class(grib_accessor_class* c) static void init(grib_accessor* a,const long v, grib_arguments* args) { - grib_accessor_data_simple_packing *self =(grib_accessor_data_simple_packing*)a; - self->units_factor = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->units_bias = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->changing_precision = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->number_of_values = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->bits_per_value = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->reference_value = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->binary_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++); - self->decimal_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++); - a->flags |= GRIB_ACCESSOR_FLAG_DATA; - self->dirty=1; + grib_accessor_data_simple_packing *self =(grib_accessor_data_simple_packing*)a; + self->units_factor = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->units_bias = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->changing_precision = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->number_of_values = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->bits_per_value = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->reference_value = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->binary_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++); + self->decimal_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++); + a->flags |= GRIB_ACCESSOR_FLAG_DATA; + self->dirty=1; } static unsigned long nbits[32]={ - 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, - 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, - 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, - 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, - 0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000, 0x20000000, - 0x40000000, 0x80000000 + 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, + 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, + 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, + 0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000, 0x20000000, + 0x40000000, 0x80000000 }; GRIB_INLINE static long number_of_bits(unsigned long x) { @@ -188,459 +188,466 @@ GRIB_INLINE static long number_of_bits(unsigned long x) { static long value_count(grib_accessor* a){ - grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; + grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; - long number_of_values; + long number_of_values; + + if(grib_get_long_internal(a->parent->h,self->number_of_values,&number_of_values) != GRIB_SUCCESS) + return 0; + + return number_of_values; - if(grib_get_long_internal(a->parent->h,self->number_of_values,&number_of_values) != GRIB_SUCCESS) - return 0; - - return number_of_values; - } static int unpack_double_element(grib_accessor* a, size_t idx, double* val) { - grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; + grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; - size_t n_vals; - int err = 0; + size_t n_vals; + int err = 0; - double reference_value; - long binary_scale_factor; - long bits_per_value; - long decimal_scale_factor; - unsigned char* buf = (unsigned char*)a->parent->h->buffer->data; - double s = 0; - double d = 0; - long pos = 0; - size_t o = 0; + double reference_value; + long binary_scale_factor; + long bits_per_value; + long decimal_scale_factor; + unsigned char* buf = (unsigned char*)a->parent->h->buffer->data; + double s = 0; + double d = 0; + long pos = 0; + size_t o = 0; - n_vals = grib_value_count(a); - if(n_vals==0){ - return GRIB_NOT_FOUND; - } - - if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) != - GRIB_SUCCESS) - return err; - - self->dirty=0; - - if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) != - GRIB_SUCCESS) - return err; - - if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor)) - != GRIB_SUCCESS) - return err; - - if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor)) - != GRIB_SUCCESS) - return err; - - /* Special case */ - - if (bits_per_value == 0) { - *val=reference_value; - return GRIB_SUCCESS; - } - - s = grib_power(binary_scale_factor,2); - d = grib_power(-decimal_scale_factor,10) ; - - grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, - "grib_accessor_data_simple_packing : unpack_double : creating %s, %d values", - a->name, n_vals); - - buf += grib_byte_offset(a); - - Assert(((bits_per_value*n_vals)/8) < (1<<29)); - /*ensure that the bit pointer is not overflown*/ - - if(bits_per_value%8) - { - grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, - "unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ", - bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor); - pos=idx*bits_per_value; - *val= (double) ((( - grib_decode_unsigned_long(buf, &pos, bits_per_value)*s)+reference_value)*d); - /* val[i] = grib_decode_unsigned_long(buf, &pos, bits_per_value); */ - /* fprintf(stdout,"unpck uuu-o: %d vals %d bitspv buf %d by long \n", n_vals, bits_per_value, pos/8);*/ - } - else - { - int bc; - long lvalue = 0; - int l = bits_per_value/8; - - pos=idx*l; - buf+=pos; - lvalue = 0; - lvalue <<= 8; - lvalue |= buf[o++] ; - - for ( bc=1; bcparent->h,self->bits_per_value,&bits_per_value)) != + GRIB_SUCCESS) + return err; - return err; + self->dirty=0; + + if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) != + GRIB_SUCCESS) + return err; + + if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor)) + != GRIB_SUCCESS) + return err; + + if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor)) + != GRIB_SUCCESS) + return err; + + /* Special case */ + + if (bits_per_value == 0) { + *val=reference_value; + return GRIB_SUCCESS; + } + + s = grib_power(binary_scale_factor,2); + d = grib_power(-decimal_scale_factor,10) ; + + grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, + "grib_accessor_data_simple_packing : unpack_double : creating %s, %d values", + a->name, n_vals); + + buf += grib_byte_offset(a); + + Assert(((bits_per_value*n_vals)/8) < (1<<29)); + /*ensure that the bit pointer is not overflown*/ + + if(bits_per_value%8) + { + grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, + "unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ", + bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor); + pos=idx*bits_per_value; + *val= (double) ((( + grib_decode_unsigned_long(buf, &pos, bits_per_value)*s)+reference_value)*d); + /* val[i] = grib_decode_unsigned_long(buf, &pos, bits_per_value); */ + /* fprintf(stdout,"unpck uuu-o: %d vals %d bitspv buf %d by long \n", n_vals, bits_per_value, pos/8);*/ + } + else + { + int bc; + long lvalue = 0; + int l = bits_per_value/8; + + pos=idx*l; + buf+=pos; + lvalue = 0; + lvalue <<= 8; + lvalue |= buf[o++] ; + + for ( bc=1; bcparent->h,self->bits_per_value,&bits_per_value)) != + GRIB_SUCCESS) + return err; + + /*/ + * check we don't decode bpv > max(ulong) as it is + * not currently supported by the algorithm + */ + if ( bits_per_value > (sizeof(long)*8) ) { + return GRIB_INVALID_BPV; + } + + if(self->units_factor && + (grib_get_double_internal(a->parent->h,self->units_factor,&units_factor)== GRIB_SUCCESS)) { + grib_set_double_internal(a->parent->h,self->units_factor,1.0); + } + + if(self->units_bias && + (grib_get_double_internal(a->parent->h,self->units_bias,&units_bias)== GRIB_SUCCESS)) { + grib_set_double_internal(a->parent->h,self->units_bias,0.0); + } + + if(n_vals==0){ + *len = 0; + return GRIB_SUCCESS; + } + + self->dirty=0; + + if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) != + GRIB_SUCCESS) + return err; + + if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor)) + != GRIB_SUCCESS) + return err; + + if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor)) + != GRIB_SUCCESS) + return err; + + /* Special case */ + + if(bits_per_value == 0) + { + for(i = 0; i < n_vals; i++) + val[i] = reference_value; + *len = n_vals; + return GRIB_SUCCESS; + } + + s = grib_power(binary_scale_factor,2); + d = grib_power(-decimal_scale_factor,10) ; + + grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, + "grib_accessor_data_simple_packing : unpack_double : creating %s, %d values", + a->name, n_vals); + + buf += grib_byte_offset(a); + + Assert(((bits_per_value*n_vals)/8) < (1<<29)); + /*ensure that the bit pointer is not overflown*/ + + grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, + "unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ", + bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor); + grib_decode_double_array(buf,&pos,bits_per_value,reference_value,s,d,n_vals,val); + *len = (long) n_vals; - return GRIB_ARRAY_TOO_SMALL; - } - if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) != - GRIB_SUCCESS) + if (units_factor != 1.0) { + if (units_bias != 0.0) + for (i=0;i max(ulong) as it is - * not currently supported by the algorithm - */ - if ( bits_per_value > (sizeof(long)*8) ) { - return GRIB_INVALID_BPV; - } - - if(self->units_factor && - (grib_get_double_internal(a->parent->h,self->units_factor,&units_factor)== GRIB_SUCCESS)) { - grib_set_double_internal(a->parent->h,self->units_factor,1.0); - } - - if(self->units_bias && - (grib_get_double_internal(a->parent->h,self->units_bias,&units_bias)== GRIB_SUCCESS)) { - grib_set_double_internal(a->parent->h,self->units_bias,0.0); - } - - if(n_vals==0){ - *len = 0; - return GRIB_SUCCESS; - } - - self->dirty=0; - - if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) != - GRIB_SUCCESS) - return err; - - if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor)) - != GRIB_SUCCESS) - return err; - - if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor)) - != GRIB_SUCCESS) - return err; - - /* Special case */ - - if(bits_per_value == 0) - { - for(i = 0; i < n_vals; i++) - val[i] = reference_value; - *len = n_vals; - return GRIB_SUCCESS; - } - - s = grib_power(binary_scale_factor,2); - d = grib_power(-decimal_scale_factor,10) ; - - grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, - "grib_accessor_data_simple_packing : unpack_double : creating %s, %d values", - a->name, n_vals); - - buf += grib_byte_offset(a); - - Assert(((bits_per_value*n_vals)/8) < (1<<29)); - /*ensure that the bit pointer is not overflown*/ - - grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG, - "unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ", - bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor); - grib_decode_double_array(buf,&pos,bits_per_value,reference_value,s,d,n_vals,val); - - *len = (long) n_vals; - - if (units_factor != 1.0) { - if (units_bias != 0.0) - for (i=0;iparent->h->buffer->data; - size_t nvals = len; - size_t *plen=&len; - long bits_per_value=0; - long pos; - int err; + grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; + unsigned char* buf = (unsigned char*)a->parent->h->buffer->data; + size_t nvals = len; + size_t *plen=&len; + long bits_per_value=0; + long pos; + int err; - if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) != - GRIB_SUCCESS) - return err; + if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) != + GRIB_SUCCESS) + return err; - buf+=(start*bits_per_value)/8; - pos=start*bits_per_value%8; - return _unpack_double(a,val,plen,buf,pos,nvals); + buf+=(start*bits_per_value)/8; + pos=start*bits_per_value%8; + return _unpack_double(a,val,plen,buf,pos,nvals); } static int unpack_double(grib_accessor* a, double* val, size_t *len) { - unsigned char* buf = (unsigned char*)a->parent->h->buffer->data; - size_t nvals = grib_value_count(a); - long pos=0; - return _unpack_double(a,val,len,buf,pos,nvals); + unsigned char* buf = (unsigned char*)a->parent->h->buffer->data; + size_t nvals = grib_value_count(a); + long pos=0; + return _unpack_double(a,val,len,buf,pos,nvals); } +#if GRIB_IBMPOWER67_OPT +#define restrict +#include "minmax_val.c" +#undef restrict +#endif static int pack_double(grib_accessor* a, const double* val, size_t *len) { - grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; + grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a; - size_t i = 0; - size_t n_vals = *len; - int err = 0; - int last; - double reference_value = 0; - long binary_scale_factor = 0; - long bits_per_value = 0; - long decimal_scale_factor = 0; - long decimal_scale_factor_get = 0; - double decimal = 1; - double max = 0; - double min = 0; - double unscaled_max = 0; - double unscaled_min = 0; - double f=0; - double range=0; - double minrange=0,maxrange=0; - long changing_precision=0; - grib_context* c=a->parent->h->context; + size_t i = 0; + size_t n_vals = *len; + int err = 0; + int last; + double reference_value = 0; + long binary_scale_factor = 0; + long bits_per_value = 0; + long decimal_scale_factor = 0; + long decimal_scale_factor_get = 0; + double decimal = 1; + double max = 0; + double min = 0; + double unscaled_max = 0; + double unscaled_min = 0; + double f=0; + double range=0; + double minrange=0,maxrange=0; + long changing_precision=0; + grib_context* c=a->parent->h->context; - decimal_scale_factor=0; + decimal_scale_factor=0; - if (*len == 0) { - return GRIB_NO_VALUES; - } + if (*len == 0) { + return GRIB_NO_VALUES; + } - if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) != - GRIB_SUCCESS) - return err; + if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) != + GRIB_SUCCESS) + return err; - if(*len == 0) return GRIB_SUCCESS; + if(*len == 0) return GRIB_SUCCESS; - if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor_get)) - != GRIB_SUCCESS) - return err; - /*/ - * check we don't encode bpv > max(ulong)-1 as it is - * not currently supported by the algorithm - */ - if ( bits_per_value > (sizeof(long)*8-1) ) { - return GRIB_INVALID_BPV; - } + if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor_get)) + != GRIB_SUCCESS) + return err; + /*/ + * check we don't encode bpv > max(ulong)-1 as it is + * not currently supported by the algorithm + */ + if ( bits_per_value > (sizeof(long)*8-1) ) { + return GRIB_INVALID_BPV; + } - self->dirty=1; + self->dirty=1; - max = val[0]; - min = max; - for(i=1;i< n_vals;i++) { - if (val[i] > max ) max = val[i]; - if (val[i] < min ) min = val[i]; - } + max = val[0]; + min = max; +#if GRIB_IBMPOWER67_OPT + minmax_val(val+1, n_vals-1, &min, &max); +#else + for(i=1;i< n_vals;i++) { + if (val[i] > max ) max = val[i]; + if (val[i] < min ) min = val[i]; + } +#endif - /* constant field only reference_value is set and bits_per_value=0 */ - if(max==min) { - if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,val[0],&reference_value) - !=GRIB_SUCCESS) { - grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, - "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); - exit(GRIB_INTERNAL_ERROR); - } - if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) != - GRIB_SUCCESS) - return err; + /* constant field only reference_value is set and bits_per_value=0 */ + if(max==min) { + if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,val[0],&reference_value) + !=GRIB_SUCCESS) { + grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, + "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); + exit(GRIB_INTERNAL_ERROR); + } + if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) != + GRIB_SUCCESS) + return err; - { - /* Make sure we can decode it again */ - double ref = 1e-100; - grib_get_double_internal(a->parent->h,self->reference_value,&ref); - if (ref != reference_value) - printf("%.20e != %.20e",ref,reference_value); - Assert(ref == reference_value); - } + { + /* Make sure we can decode it again */ + double ref = 1e-100; + grib_get_double_internal(a->parent->h,self->reference_value,&ref); + if (ref != reference_value) + printf("%.20e != %.20e",ref,reference_value); + Assert(ref == reference_value); + } - if (c->large_constant_fields || (c->gribex_mode_on==1 && self->edition==1)) { - if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, 0)) != - GRIB_SUCCESS) - return err; + if (c->large_constant_fields || (c->gribex_mode_on==1 && self->edition==1)) { + if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, 0)) != + GRIB_SUCCESS) + return err; - if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, 0)) != - GRIB_SUCCESS) - return err; + if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, 0)) != + GRIB_SUCCESS) + return err; - if (bits_per_value==0) { - if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, 16)) != - GRIB_SUCCESS) - return err; - } + if (bits_per_value==0) { + if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, 16)) != + GRIB_SUCCESS) + return err; + } - return GRIB_SUCCESS; - } else { - bits_per_value=0; - if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) != - GRIB_SUCCESS) - return err; + return GRIB_SUCCESS; + } else { + bits_per_value=0; + if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) != + GRIB_SUCCESS) + return err; - return GRIB_CONSTANT_FIELD; - } - } + return GRIB_CONSTANT_FIELD; + } + } - if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor)) - != GRIB_SUCCESS) - return err; + if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor)) + != GRIB_SUCCESS) + return err; - if((err = grib_get_long_internal(a->parent->h,self->changing_precision, &changing_precision)) - != GRIB_SUCCESS) - return err; + if((err = grib_get_long_internal(a->parent->h,self->changing_precision, &changing_precision)) + != GRIB_SUCCESS) + return err; - /* the packing parameters are not properly defined + /* the packing parameters are not properly defined this is a safe way of fixing the problem */ - if ( changing_precision==0 && bits_per_value==0 && decimal_scale_factor_get==0) { + if ( changing_precision==0 && bits_per_value==0 && decimal_scale_factor_get==0) { - grib_context_log(a->parent->h->context,GRIB_LOG_WARNING, - "%s==0 and %s==0 (setting %s=24)", - self->bits_per_value, - self->decimal_scale_factor, - self->bits_per_value); - - bits_per_value=24; - if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, - bits_per_value))!= GRIB_SUCCESS) - return err; - } + grib_context_log(a->parent->h->context,GRIB_LOG_WARNING, + "%s==0 and %s==0 (setting %s=24)", + self->bits_per_value, + self->decimal_scale_factor, + self->bits_per_value); - if ( bits_per_value == 0 || (binary_scale_factor==0 && decimal_scale_factor_get!=0) ) { - /* decimal_scale_factor is given, binary_scale_factor=0 + bits_per_value=24; + if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, + bits_per_value))!= GRIB_SUCCESS) + return err; + } + + if ( bits_per_value == 0 || (binary_scale_factor==0 && decimal_scale_factor_get!=0) ) { + /* decimal_scale_factor is given, binary_scale_factor=0 and bits_per_value is computed */ - binary_scale_factor=0; - decimal_scale_factor=decimal_scale_factor_get; - decimal = grib_power(decimal_scale_factor,10) ; - min*=decimal; - max*=decimal; + binary_scale_factor=0; + decimal_scale_factor=decimal_scale_factor_get; + decimal = grib_power(decimal_scale_factor,10) ; + min*=decimal; + max*=decimal; - /* bits_per_value=(long)ceil(log((double)(imax-imin+1))/log(2.0)); */ - bits_per_value=number_of_bits((unsigned long)fabs(max-min)); - /*printf("bits_per_value=%ld\n",bits_per_value);*/ - if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) != - GRIB_SUCCESS) - return err; - if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value) - !=GRIB_SUCCESS) { - grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, - "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); - exit(GRIB_INTERNAL_ERROR); - } - /* divisor=1; */ - } else { - last=127; - if (c->gribex_mode_on && self->edition==1) last=99; - /* bits_per_value is given and decimal_scale_factor - and binary_scale_factor are calcualated - */ - if (max == min) { - binary_scale_factor=0; - /* divisor=1; */ - if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value) - !=GRIB_SUCCESS) { - grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, - "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); - exit(GRIB_INTERNAL_ERROR); - } + /* bits_per_value=(long)ceil(log((double)(imax-imin+1))/log(2.0)); */ + bits_per_value=number_of_bits((unsigned long)fabs(max-min)); + /*printf("bits_per_value=%ld\n",bits_per_value);*/ + if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) != + GRIB_SUCCESS) + return err; + if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value) + !=GRIB_SUCCESS) { + grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, + "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); + exit(GRIB_INTERNAL_ERROR); + } + /* divisor=1; */ } else { - /* printf("max=%g reference_value=%g grib_power(-last,2)=%g decimal_scale_factor=%ld bits_per_value=%ld\n", + last=127; + if (c->gribex_mode_on && self->edition==1) last=99; + /* bits_per_value is given and decimal_scale_factor + and binary_scale_factor are calcualated + */ + if (max == min) { + binary_scale_factor=0; + /* divisor=1; */ + if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value) + !=GRIB_SUCCESS) { + grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, + "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); + exit(GRIB_INTERNAL_ERROR); + } + } else { + /* printf("max=%g reference_value=%g grib_power(-last,2)=%g decimal_scale_factor=%ld bits_per_value=%ld\n", max,reference_value,grib_power(-last,2),decimal_scale_factor,bits_per_value);*/ - /* last must be a parameter coming from the def file*/ - range=(max-min); - unscaled_min=min; - unscaled_max=max; - f=(grib_power(bits_per_value,2)-1); - minrange=grib_power(-last,2)*f; - maxrange=grib_power(last,2)*f; + /* last must be a parameter coming from the def file*/ + range=(max-min); + unscaled_min=min; + unscaled_max=max; + f=(grib_power(bits_per_value,2)-1); + minrange=grib_power(-last,2)*f; + maxrange=grib_power(last,2)*f; - while (rangemaxrange) { - decimal_scale_factor-=1; - decimal/=10; - min=unscaled_min*decimal; - max=unscaled_max*decimal; - range=(max-min); - } + while (rangemaxrange) { + decimal_scale_factor-=1; + decimal/=10; + min=unscaled_min*decimal; + max=unscaled_max*decimal; + range=(max-min); + } - if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value, - min,&reference_value)!=GRIB_SUCCESS) { - grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, - "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); - exit(GRIB_INTERNAL_ERROR); - } + if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value, + min,&reference_value)!=GRIB_SUCCESS) { + grib_context_log(a->parent->h->context,GRIB_LOG_ERROR, + "unable to find nearest_smaller_value of %g for %s",min,self->reference_value); + exit(GRIB_INTERNAL_ERROR); + } - binary_scale_factor = grib_get_binary_scale_fact(max,reference_value,bits_per_value,&err); + binary_scale_factor = grib_get_binary_scale_fact(max,reference_value,bits_per_value,&err); + } } - } - if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) != - GRIB_SUCCESS) - return err; + if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) != + GRIB_SUCCESS) + return err; - if((err = grib_set_long_internal(a->parent->h,self->changing_precision, 0)) != - GRIB_SUCCESS) - return err; - if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, binary_scale_factor)) != - GRIB_SUCCESS) - return err; - if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, decimal_scale_factor)) - != GRIB_SUCCESS) - return err; + if((err = grib_set_long_internal(a->parent->h,self->changing_precision, 0)) != + GRIB_SUCCESS) + return err; + if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, binary_scale_factor)) != + GRIB_SUCCESS) + return err; + if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, decimal_scale_factor)) + != GRIB_SUCCESS) + return err; - return GRIB_SUCCESS; - + return GRIB_SUCCESS; } - diff --git a/src/grib_bits.c b/src/grib_bits.c index eb925c93c..6b0585154 100644 --- a/src/grib_bits.c +++ b/src/grib_bits.c @@ -36,129 +36,133 @@ static int max_nbits = sizeof(unsigned long)*8; unsigned long grib_decode_unsigned_byte_long(const unsigned char* p, long o, int l) { - long accum = 0; - int i = 0; - unsigned char b = p[o++]; + long accum = 0; + int i = 0; + unsigned char b = p[o++]; - Assert(l <= max_nbits); + Assert(l <= max_nbits); - accum <<= 8; - accum |= b ; - - for ( i=1; i> (l*8-(8*(i+1))); - p[o++] = accum ; - } + for ( i=0; i> (l*8-(8*(i+1))); + p[o++] = accum ; + } - if (sign) p[off] |= 128; + if (sign) p[off] |= 128; - return GRIB_SUCCESS; + return GRIB_SUCCESS; } static void grib_set_bit_on( unsigned char* p, long* bitp){ - p += *bitp/8; - *p |= (1u << (7-((*bitp)%8))); - (*bitp)++; + p += *bitp/8; + *p |= (1u << (7-((*bitp)%8))); + (*bitp)++; } static void grib_set_bit_off( unsigned char* p, long* bitp){ - p += *bitp/8; - *p &= ~(1u << (7-((*bitp)%8))); - (*bitp)++; + p += *bitp/8; + *p &= ~(1u << (7-((*bitp)%8))); + (*bitp)++; } int grib_get_bit(const unsigned char* p, long bitp){ - p += (bitp >> 3); - return (*p&(1<<(7-(bitp%8)))); + p += (bitp >> 3); + return (*p&(1<<(7-(bitp%8)))); } void grib_set_bit( unsigned char* p, long bitp, int val){ - if(val == 0) grib_set_bit_off(p,&bitp); - else grib_set_bit_on(p,&bitp); + if(val == 0) grib_set_bit_off(p,&bitp); + else grib_set_bit_on(p,&bitp); } long grib_decode_signed_longb(const unsigned char* p, long *bitp, long nbits) { - int sign = grib_get_bit(p, *bitp); - long val = 0; + int sign = grib_get_bit(p, *bitp); + long val = 0; - Assert(nbits <= max_nbits); + Assert(nbits <= max_nbits); - *bitp += 1; + *bitp += 1; - val = grib_decode_unsigned_long( p, bitp, nbits-1); + val = grib_decode_unsigned_long( p, bitp, nbits-1); - if (sign)val = -val; + if (sign)val = -val; - - return val; + return val; } int grib_encode_signed_longb(unsigned char* p, long val ,long *bitp, long nb) { - short sign = val < 0; + short sign = val < 0; - Assert(nb <= max_nbits); + Assert(nb <= max_nbits); - if(sign) val = -val; + if(sign) val = -val; - if(sign) - grib_set_bit_on (p, bitp); - else - grib_set_bit_off(p, bitp); + if(sign) + grib_set_bit_on (p, bitp); + else + grib_set_bit_off(p, bitp); - return grib_encode_unsigned_longb(p, val ,bitp, nb-1); + return grib_encode_unsigned_longb(p, val ,bitp, nb-1); } +#if GRIB_IBMPOWER67_OPT + +#include "grib_bits_ibmpow.c" + +#else #if FAST_BIG_ENDIAN #include "grib_bits_fast_big_endian.c" @@ -169,3 +173,4 @@ int grib_encode_signed_longb(unsigned char* p, long val ,long *bitp, long nb) #endif +#endif diff --git a/src/grib_bits_ibmpow.c b/src/grib_bits_ibmpow.c new file mode 100644 index 000000000..ad2feffdc --- /dev/null +++ b/src/grib_bits_ibmpow.c @@ -0,0 +1,148 @@ +/* + * Copyright 2005-2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * + * In applying this licence, ECMWF does not waive the privileges and immunities granted to it by + * virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. + */ + + +unsigned long grib_decode_unsigned_long(const unsigned char* p, long *bitp, long nbits) +{ + int i; + long ret = 0; + long o = *bitp/8; + int l = nbits/8; + + if (nbits==0) return 0; + + if(nbits > max_nbits) + { + int bits = nbits; + int mod = bits % max_nbits; + + if(mod != 0) + { + int e=grib_decode_unsigned_long(p,bitp,mod); + Assert( e == 0); + bits -= mod; + } + + while(bits > max_nbits) + { + int e=grib_decode_unsigned_long(p,bitp,max_nbits); + Assert( e == 0); + bits -= max_nbits; + } + + return grib_decode_unsigned_long(p,bitp,bits); + } + + if((nbits%8 > 0)||(*bitp%8 > 0)) { + for(i=0; i< nbits;i++){ + ret <<= 1; + if(grib_get_bit( p, *bitp)) ret += 1; + *bitp += 1; + } + return ret; + } + + ret <<= 8; + ret |= p[o++] ; + + for ( i=1; i max_nbits) + { + /* TODO: Do some real code here, to support long longs */ + int bits = nbits; + int mod = bits % max_nbits; + long zero = 0; + + /* printf("Warning: encoding %ld bits=%ld %ld\n",val,nbits,*bitp); */ + + if(mod != 0) + { + int e=grib_encode_unsigned_long(p,zero,bitp,mod); + /* printf(" -> : encoding %ld bits=%ld %ld\n",zero,(long)mod,*bitp); */ + Assert( e == 0); + bits -= mod; + } + + while(bits > max_nbits) + { + int e=grib_encode_unsigned_long(p,zero,bitp,max_nbits); + /* printf(" -> : encoding %ld bits=%ld %ld\n",zero,(long)max_nbits,*bitp); */ + Assert(e == 0); + bits -= max_nbits; + } + + /* printf(" -> : encoding %ld bits=%ld %ld\n",val,(long)bits,*bitp); */ + return grib_encode_unsigned_long(p,val,bitp,bits); + } + + if(s) + p += (*bitp >> 3); /* skip the bytes */ + else + p += (*bitp >> 3); /* skip the bytes */ + + /* head */ + if(s) { + len -= n; + if (len < 0) { + tmp = ((val << -len) | ((*p) & dmasks[n])); + } else { + tmp = ((val >> len) | ((*p) & dmasks[n])); + } + *p = tmp; + (*p)++; + } + + /* write the middle words */ + while(len >= 8) + { + len -= 8; + *p++ = (val >> len); + } + + /* write the end bits */ + if(len) *p = (val << (8-len)); + + *bitp += nbits; + return GRIB_SUCCESS; +} + +int grib_encode_unsigned_longb(unsigned char* p, unsigned long val ,long *bitp, long nb) +{ + long i = 0; + + Assert(nb <= max_nbits); + + for(i=nb-1; i >= 0; i--){ + if(test(val,i)) + grib_set_bit_on (p, bitp); + else + grib_set_bit_off(p, bitp); + } + return GRIB_SUCCESS; +} + +#include "grib_bits_ibmpow_opt.c" + diff --git a/src/grib_bits_ibmpow_opt.c b/src/grib_bits_ibmpow_opt.c new file mode 100644 index 000000000..c003dc213 --- /dev/null +++ b/src/grib_bits_ibmpow_opt.c @@ -0,0 +1,298 @@ +/* + * Copyright 2005-2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * + * In applying this licence, ECMWF does not waive the privileges and immunities granted to it by + * virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. + */ + +int grib_decode_long_array(const unsigned char* p, long *bitp, long bitsPerValue, + size_t n_vals,long* val) { + long i=0; + unsigned long lvalue = 0; + + if(bitsPerValue%8) + { + int j=0; + for(i=0;i < n_vals;i++) { + lvalue=0; + for(j=0; j< bitsPerValue;j++){ + lvalue <<= 1; + if(grib_get_bit( p, *bitp)) lvalue += 1; + *bitp += 1; + } + val[i] = lvalue; + } + } else { + int bc; + int l = bitsPerValue/8; + size_t o = *bitp/8; + + for(i=0;i < n_vals;i++) + { + lvalue = 0; + lvalue <<= 8; + lvalue |= p[o++] ; + + for ( bc=1; bc= max_nbits) { + pp+=startBit/max_nbits; + startBit %= max_nbits; + } + + if ( (max_nbits%nbits == 0) && (*bitp%nbits == 0) ) { + for (i=0;i max_nbits) { + countOfLeftmostBits = max_nbits - startBit; + remainingBits -= countOfLeftmostBits; + leftmostBits=(VALUE(*pp,startBit,countOfLeftmostBits)) << remainingBits; + startBit = 0; + pp++; + } else + leftmostBits = 0; + + val[i]=leftmostBits+(VALUE(*pp,startBit,remainingBits)); + val[i]= ((( (val[i]) * s)+reference_value)*d[i/2]); + startBit+=remainingBits; + remainingBits=nbits; + } + } + + *bitp+=size*nbits; + + return GRIB_SUCCESS; +} + +int grib_encode_long_array(size_t n_vals,const long* val,long bits_per_value,unsigned char* p,long *off) +{ + size_t i=0; + unsigned long unsigned_val=0; + unsigned char *encoded=p; + if(bits_per_value%8){ + for(i=0;i< n_vals;i++){ + unsigned_val=val[i]; + grib_encode_unsigned_longb(encoded, unsigned_val, off , bits_per_value); + } + } else{ + for(i=0;i< n_vals;i++){ + int blen=0; + blen = bits_per_value; + unsigned_val = val[i]; + while(blen >= 8) + { + blen -= 8; + *encoded = (unsigned_val >> blen); + encoded++; + *off+=8; + } + } + } + return GRIB_SUCCESS; +} + +#define U_BYTEORDER static union {unsigned long l; unsigned char c[sizeof(long)];} u_byteorder = {1} +#define IS_BIGENDIAN() (u_byteorder.c[sizeof(long) - 1]) +#define GRIBPACK unsigned char +#define Error(x, y) fprintf(stderr, x, y) +#define restrict + +#include "encode_double_array.c" + +#undef U_BYTEORDER +#undef IS_BIGENDIAN +#undef GRIBPACK +#undef Error +#undef restrict + +int grib_encode_double_array(size_t n_vals,const double* val,long bits_per_value,double reference_value,double d,double divisor,unsigned char* p,long *off) +{ + if ( fabs(d - 1) > 0 ) /* d != 1 */ + { + size_t i=0; + unsigned long unsigned_val=0; + unsigned char *encoded=p; + double x; + + if(bits_per_value%8){ + for(i=0;i< n_vals;i++){ + x=(((val[i]*d)-reference_value)*divisor)+0.5; + unsigned_val = (unsigned long)x; + grib_encode_unsigned_longb(encoded, unsigned_val, off , bits_per_value); + } + } else{ + for(i=0;i< n_vals;i++){ + int blen=0; + blen = bits_per_value; + x = ((((val[i]*d)-reference_value)*divisor)+0.5); + unsigned_val = (unsigned long)x; + while(blen >= 8) + { + blen -= 8; + *encoded = (unsigned_val >> blen); + encoded++; + *off+=8; + } + } + } + } + else + { +#if defined (_ARCH_PWR6) + encode_double_array_unrolled(bits_per_value, 0, n_vals, p, val, reference_value, divisor, off); +#else + encode_double_array_byte (bits_per_value, 0, n_vals, p, val, reference_value, divisor, off); +#endif + } + + return GRIB_SUCCESS; +} + +/* code from grib_bits_fast_big_endian_simple.c */ +int grib_encode_double_array_complex(size_t n_vals,double* val,long nbits,double reference_value, + double* scal,double d,double divisor,unsigned char* p,long *bitp) { + long* destination = (long*)p; + double* v=val; + long countOfLeftmostBits=0,startBit=0,remainingBits=0,rightmostBits=0; + unsigned long uval=0; + size_t i=0; + + startBit=*bitp; + remainingBits = nbits; + + if (startBit >= max_nbits) { + destination += startBit / max_nbits; + startBit %= max_nbits; + } + + if ( (max_nbits%nbits == 0) && (*bitp%nbits == 0) ) { + + for(i=0;i< n_vals;i++) { + uval = (unsigned long)(((((*v)*d*scal[i/2])-reference_value)*divisor)+0.5); + if (startBit == max_nbits) { + startBit = 0; + destination++; + } + + rightmostBits = VALUE(uval,max_nbits-remainingBits,remainingBits); + *destination = ((*destination) & ~MASKVALUE(startBit,remainingBits)) + + (rightmostBits << max_nbits-(remainingBits+startBit)); + startBit+=remainingBits; + remainingBits=nbits; + + v++; + } + + } else { + + for(i=0;i< n_vals;i++) { + countOfLeftmostBits = startBit + remainingBits; + uval = (unsigned long)(((((*v)*d*scal[i/2])-reference_value)*divisor)+0.5); + if (countOfLeftmostBits > max_nbits) { + countOfLeftmostBits = max_nbits - startBit; + startBit = max_nbits - remainingBits; + remainingBits -= countOfLeftmostBits; + *destination = (((*destination) >> countOfLeftmostBits) << countOfLeftmostBits) + + (VALUE(uval,startBit,countOfLeftmostBits)); + startBit = 0; + destination++; + } + + rightmostBits = VALUE(uval,max_nbits-remainingBits,remainingBits); + *destination = ((*destination) & ~MASKVALUE(startBit,remainingBits)) + + (rightmostBits << max_nbits-(remainingBits+startBit)); + startBit+=remainingBits; + remainingBits=nbits; + + v++; + } + } + + *bitp+=n_vals*nbits; + + return 0; +} diff --git a/src/minmax_val.c b/src/minmax_val.c new file mode 100644 index 000000000..fb6dbec2d --- /dev/null +++ b/src/minmax_val.c @@ -0,0 +1,332 @@ +/* + * Copyright 2005-2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * + * In applying this licence, ECMWF does not waive the privileges and immunities granted to it by + * virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. + */ + + +#ifdef __PGI +#undef __AVX__ +#undef __SSE2__ +#endif + + +#ifdef _GET_IBM_COUNTER +#include +#endif + +#ifdef __AVX__ +#include +#include +#include +#include +#ifdef _GET_X86_COUNTER +#include +#endif +#else +#ifdef __SSE2__ +#include +#include +#include +#include +#ifdef _GET_X86_COUNTER +#include +#endif +#endif +#endif + + +#ifdef __AVX__ +static +void avx_minmax_val(const double *restrict buf, long nframes, double *min, double *max) +{ + __m256d current_max, current_min, work; + + /* load max and min values into all four slots of the XMM registers */ + current_min = _mm256_set1_pd(*min); + current_max = _mm256_set1_pd(*max); + + /* Work input until "buf" reaches 32 byte alignment */ + while ( ((unsigned long)buf) % 32 != 0 && nframes > 0) { + + /* Load the next double into the work buffer */ + work = _mm256_set1_pd(*buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf++; + nframes--; + } + + while (nframes >= 16) { + /* use 64 byte prefetch? */ + __builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */ + + work = _mm256_load_pd(buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf += 4; + __builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */ + work = _mm256_load_pd(buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf += 4; + __builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */ + work = _mm256_load_pd(buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf += 4; + __builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */ + work = _mm256_load_pd(buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf += 4; + nframes -= 16; + } + + /* work through aligned buffers */ + while (nframes >= 4) { + work = _mm256_load_pd(buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf += 4; + nframes -= 4; + } + + /* work through the remainung values */ + while ( nframes > 0) { + work = _mm256_set1_pd(*buf); + current_min = _mm256_min_pd(current_min, work); + current_max = _mm256_max_pd(current_max, work); + buf++; + nframes--; + } + + /* find min & max value through shuffle tricks */ + + work = current_min; + work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(2, 3, 0, 1)); + work = _mm256_min_pd (work, current_min); + current_min = work; + work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(1, 0, 3, 2)); + work = _mm256_min_pd (work, current_min); + _mm256_store_pd(min, work); + work = current_max; + work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(2, 3, 0, 1)); + work = _mm256_max_pd (work, current_max); + current_max = work; + work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(1, 0, 3, 2)); + work = _mm256_max_pd (work, current_max); + _mm256_store_pd(max, work); + + return; +} + +#else + +#ifdef __SSE2__ +static +void sse2_minmax_val(const double *restrict buf, long nframes, double *min, double *max) +{ + __m128d current_max, current_min, work; + + /* load starting max and min values into all slots of the XMM registers */ + current_min = _mm_set1_pd(*min); + current_max = _mm_set1_pd(*max); + + /* work on input until buf reaches 16 byte alignment */ + while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) { + + /* load one double and replicate */ + work = _mm_set1_pd(*buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf++; + nframes--; + } + + while (nframes >= 8) { + /* use 64 byte prefetch for double octetts */ + __builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2 + */ + + work = _mm_load_pd(buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf += 2; + work = _mm_load_pd(buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf += 2; + work = _mm_load_pd(buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf += 2; + work = _mm_load_pd(buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf += 2; + nframes -= 8; + } + + /* work through smaller chunks of aligned buffers without prefetching */ + while (nframes >= 2) { + work = _mm_load_pd(buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf += 2; + nframes -= 2; + } + + /* work through the remaining value */ + while ( nframes > 0) { + /* load the last double and replicate */ + work = _mm_set1_pd(*buf); + current_min = _mm_min_pd(current_min, work); + current_max = _mm_max_pd(current_max, work); + buf++; + nframes--; + } + + /* find final min and max value through shuffle tricks */ + work = current_min; + work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1)); + work = _mm_min_pd (work, current_min); + _mm_store_sd(min, work); + work = current_max; + work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1)); + work = _mm_max_pd (work, current_max); + _mm_store_sd(max, work); + + return; +} +#endif + +#endif + +static +void minmax_val(const double *restrict data, long datasize, double *fmin, double *fmax) +{ +#ifdef _GET_X86_COUNTER + uint64_t start_minmax, end_minmax; +#endif + + if ( datasize < 1 ) return; + +#ifdef _GET_X86_COUNTER + start_minmax = _rdtsc(); +#endif + +#ifdef __AVX__ + + avx_minmax_val(data, datasize, fmin, fmax); + +#else + + +#ifdef __SSE2__ + + sse2_minmax_val(data, datasize, fmin, fmax); + +#else + +#ifdef _ARCH_PWR6 +#define __UNROLL_DEPTH_1 6 + + /* to allow pipelining we have to unroll */ + +#ifdef _GET_IBM_COUNTER + hpmStart(1, "minmax fsel"); +#endif + { + long i, j; + long residual = datasize % __UNROLL_DEPTH_1; + long ofs = datasize - residual; + double register dmin[__UNROLL_DEPTH_1]; + double register dmax[__UNROLL_DEPTH_1]; + + for ( j = 0; j < __UNROLL_DEPTH_1; j++) + { + dmin[j] = data[0]; + dmax[j] = data[0]; + } + + for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_1 ) + { + for (j = 0; j < __UNROLL_DEPTH_1; j++) + { + dmin[j] = __fsel(dmin[j] - data[i+j], data[i+j], dmin[j]); + dmax[j] = __fsel(data[i+j] - dmax[j], data[i+j], dmax[j]); + } + } + + for (j = 0; j < residual; j++) + { + dmin[j] = __fsel(dmin[j] - data[ofs+j], data[ofs+j], dmin[j]); + dmax[j] = __fsel(data[ofs+j] - dmax[j], data[ofs+j], dmax[j]); + } + + for ( j = 0; j < __UNROLL_DEPTH_1; j++) + { + *fmin = __fsel(*fmin - dmin[j], dmin[j], *fmin); + *fmax = __fsel(dmax[j] - *fmax, dmax[j], *fmax); + } + } +#ifdef _GET_IBM_COUNTER + hpmStop(1); +#endif + +#undef __UNROLL_DEPTH_1 +#else + +#ifdef _GET_IBM_COUNTER + hpmStart(1, "minmax base"); +#endif + { + long i; + +#if defined (CRAY) +#pragma _CRI ivdep +#elif defined (SX) +#pragma vdir nodep +#elif defined (__uxp__) +#pragma loop novrec +#endif + for ( i = 0; i < datasize; ++i ) + { + if ( *fmin > data[i] ) *fmin = data[i]; + if ( *fmax < data[i] ) *fmax = data[i]; + /* + *fmin = *fmin < data[i] ? *fmin : data[i]; + *fmax = *fmax > data[i] ? *fmax : data[i]; + */ + } + } +#ifdef _GET_IBM_COUNTER + hpmStop(1); +#endif + +#endif +#endif +#endif + +#ifdef _GET_X86_COUNTER + end_minmax = _rdtsc(); +#ifdef __AVX__ + printf("AVX cycles:: %" PRIu64 "\n", + end_minmax-start_minmax); +#else +#ifdef __SSE2__ + printf("SSE2 cycles:: %" PRIu64 "\n", + end_minmax-start_minmax); +#else + printf("loop cycles:: %" PRIu64 "\n", + end_minmax-start_minmax); +#endif +#endif +#endif + + return; +}