mirror of https://github.com/ecmwf/eccodes.git
GRIB-153: gribapi patch to speed up encoding on IBM P6/7
This commit is contained in:
parent
cf3a27b03b
commit
7175b9c664
8
NOTICE
8
NOTICE
|
@ -1,7 +1,11 @@
|
|||
ECMWF GRIB API
|
||||
Copyright 2005-2013 ECMWF.
|
||||
|
||||
This product includes software developed at ECMWF (http://www.ecmwf.int).
|
||||
This product includes software developed at
|
||||
ECMWF (http://www.ecmwf.int).
|
||||
|
||||
Parts of the definitions provided by WMO (http://www.wmo.int/pages/index_en.html)
|
||||
Parts of the definitions provided by
|
||||
WMO (http://www.wmo.int/pages/index_en.html)
|
||||
|
||||
IBM POWER optimisations were developed at
|
||||
The Max-Planck-Institute for Meteorology.
|
||||
|
|
13
configure.ac
13
configure.ac
|
@ -89,6 +89,19 @@ fi
|
|||
AC_DEFINE_UNQUOTED(GRIB_PTHREADS,$GRIB_PTHREADS,1->pthreads enabled 0->pthreads disabled)
|
||||
AC_DEFINE_UNQUOTED(GRIB_LINUX_PTHREADS,$GRIB_LINUX_PTHREADS,1->pthreads enabled 0->pthreads disabled)
|
||||
|
||||
dnl check IBM POWER 6/7 optimisations option
|
||||
AC_ARG_ENABLE([ibmpower67_opt],
|
||||
[AS_HELP_STRING([--enable-ibmpower67_opt],[enable IBM POWER 6/7 optimisations [by default disabled]])],
|
||||
[ibmpower67_opts=${enableval}] , [ibmpower67_opts=no]
|
||||
)
|
||||
if test "x${ibmpower67_opts}" = xyes; then
|
||||
GRIB_IBMPOWER67_OPT=1
|
||||
else
|
||||
GRIB_IBMPOWER67_OPT=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED(GRIB_IBMPOWER67_OPT,$GRIB_IBMPOWER67_OPT,1->IBM Power6/7 Optimisations enabled 0->IBM Power6/7 Optimisations disabled)
|
||||
|
||||
|
||||
dnl check on uppercase fortran modules not working to be fixed
|
||||
dnl some fortran compilers change the name of the .mod file in upper case!
|
||||
ac_cv_prog_f90_uppercase_mod=no
|
||||
|
|
|
@ -302,33 +302,34 @@ list( APPEND grib_api_srcs
|
|||
grib_windef.h
|
||||
)
|
||||
|
||||
# list( APPEND grib_api_extra_srcs
|
||||
# grib_bits_any_endian.c
|
||||
# grib_bits_any_endian_simple.c
|
||||
# )
|
||||
list( APPEND grib_api_extra_srcs
|
||||
grib_bits_fast_big_endian.c grib_bits_any_endian.c
|
||||
grib_bits_fast_big_endian_vector.c grib_bits_any_endian_vector.c
|
||||
grib_bits_fast_big_endian_simple.c grib_bits_any_endian_simple.c
|
||||
grib_bits_fast_big_endian_omp.c grib_bits_any_endian_omp.c
|
||||
encode_double_array.c
|
||||
grib_bits_ibmpow.c
|
||||
grib_bits_ibmpow_opt.c
|
||||
minmax_val.c
|
||||
)
|
||||
|
||||
configure_file( grib_api_version.c.in grib_api_version.c @ONLY )
|
||||
|
||||
ecbuild_generate_yy( YYPREFIX grib_yy
|
||||
YACC griby
|
||||
LEX gribl
|
||||
DEPENDANT action.c )
|
||||
ecbuild_generate_yy(YYPREFIX grib_yy
|
||||
YACC griby
|
||||
LEX gribl
|
||||
DEPENDANT action.c )
|
||||
|
||||
ecbuild_add_library( TARGET grib_api
|
||||
SOURCES
|
||||
grib_api_version.c
|
||||
griby.c gribl.c
|
||||
${grib_api_extra_srcs}
|
||||
${grib_api_srcs}
|
||||
GENERATED
|
||||
grib_api_version.c
|
||||
INCLUDES
|
||||
${GRIB_API_EXTRA_INCLUDE_DIRS}
|
||||
LIBS
|
||||
${GRIB_API_EXTRA_LIBRARIES}
|
||||
${CMATH_LIBRARIES} )
|
||||
ecbuild_add_library(TARGET grib_api
|
||||
SOURCES grib_api_version.c
|
||||
griby.c gribl.c
|
||||
${grib_api_srcs}
|
||||
GENERATED grib_api_version.c
|
||||
INCLUDES ${GRIB_API_EXTRA_INCLUDE_DIRS}
|
||||
LIBS ${GRIB_API_EXTRA_LIBRARIES}
|
||||
TEMPLATES ${grib_api_extra_srcs}
|
||||
${CMATH_LIBRARIES} )
|
||||
|
||||
|
||||
# TODO: add support for jpg and png
|
||||
|
||||
install( FILES grib_api.h DESTINATION include )
|
||||
|
|
|
@ -332,7 +332,8 @@ EXTRA_DIST= dummy.am griby.y gribl.l grib_accessor_classes_hash.c \
|
|||
grib_bits_fast_big_endian.c grib_bits_any_endian.c \
|
||||
grib_bits_fast_big_endian_vector.c grib_bits_any_endian_vector.c \
|
||||
grib_bits_fast_big_endian_simple.c grib_bits_any_endian_simple.c \
|
||||
grib_bits_fast_big_endian_omp.c grib_bits_any_endian_omp.c
|
||||
grib_bits_fast_big_endian_omp.c grib_bits_any_endian_omp.c \
|
||||
encode_double_array.c grib_bits_ibmpow.c grib_bits_ibmpow_opt.c minmax_val.c
|
||||
|
||||
include $(DEVEL_RULES)
|
||||
|
||||
|
|
|
@ -0,0 +1,427 @@
|
|||
/*
|
||||
* Copyright 2005-2013 ECMWF.
|
||||
*
|
||||
* This software is licensed under the terms of the Apache Licence Version 2.0
|
||||
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
|
||||
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
#include <libhpc.h>
|
||||
#endif
|
||||
|
||||
|
||||
static
|
||||
void encode_double_array_common(int numBits, long packStart, long datasize, GRIBPACK *lGrib,
|
||||
const double *data, double zref, double factor, long *gz)
|
||||
{
|
||||
long i, z = *gz;
|
||||
unsigned int ival;
|
||||
int cbits, jbits;
|
||||
unsigned int c;
|
||||
static unsigned int mask[] = {0,1,3,7,15,31,63,127,255};
|
||||
|
||||
/* code from gribw routine flist2bitstream */
|
||||
|
||||
cbits = 8;
|
||||
c = 0;
|
||||
for ( i = packStart; i < datasize; i++ )
|
||||
{
|
||||
/* note float -> unsigned int .. truncate */
|
||||
ival = (unsigned int) ((data[i] - zref) * factor + 0.5);
|
||||
/*
|
||||
if ( ival > max_nbpv_pow2 ) ival = max_nbpv_pow2;
|
||||
if ( ival < 0 ) ival = 0;
|
||||
*/
|
||||
jbits = numBits;
|
||||
while ( cbits <= jbits )
|
||||
{
|
||||
if ( cbits == 8 )
|
||||
{
|
||||
jbits -= 8;
|
||||
lGrib[z++] = (ival >> jbits) & 0xFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
jbits -= cbits;
|
||||
lGrib[z++] = (c << cbits) + ((ival >> jbits) & mask[cbits]);
|
||||
cbits = 8;
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
/* now jbits < cbits */
|
||||
if ( jbits )
|
||||
{
|
||||
c = (c << jbits) + (ival & mask[jbits]);
|
||||
cbits -= jbits;
|
||||
}
|
||||
}
|
||||
if ( cbits != 8 ) lGrib[z++] = c << cbits;
|
||||
|
||||
*gz = z;
|
||||
}
|
||||
|
||||
static
|
||||
void encode_double_array_byte(int numBits, long packStart, long datasize,
|
||||
GRIBPACK *restrict lGrib,
|
||||
const double *restrict data,
|
||||
double zref, double factor, long *restrict gz)
|
||||
{
|
||||
long i, z = *gz;
|
||||
unsigned long ival;
|
||||
double tmp;
|
||||
|
||||
data += packStart;
|
||||
datasize -= packStart;
|
||||
|
||||
if ( numBits == 8 )
|
||||
{
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(2, "pack 8 bit base");
|
||||
#endif
|
||||
#if defined (CRAY)
|
||||
#pragma _CRI ivdep
|
||||
#elif defined (SX)
|
||||
#pragma vdir nodep
|
||||
#elif defined (__uxp__)
|
||||
#pragma loop novrec
|
||||
#endif
|
||||
for ( i = 0; i < datasize; i++ )
|
||||
{
|
||||
tmp = ((data[i] - zref) * factor + 0.5);
|
||||
ival = (unsigned long) tmp;
|
||||
lGrib[z ] = ival;
|
||||
z++;
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(2);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits == 16 )
|
||||
{
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(3, "pack 16 bit base");
|
||||
#endif
|
||||
#if defined (CRAY)
|
||||
#pragma _CRI ivdep
|
||||
#elif defined (SX)
|
||||
#pragma vdir nodep
|
||||
#elif defined (__uxp__)
|
||||
#pragma loop novrec
|
||||
#endif
|
||||
for ( i = 0; i < datasize; i++ )
|
||||
{
|
||||
tmp = ((data[i] - zref) * factor + 0.5);
|
||||
ival = (unsigned long) tmp;
|
||||
lGrib[z ] = ival >> 8;
|
||||
lGrib[z+1] = ival;
|
||||
z += 2;
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(3);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits == 24 )
|
||||
{
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(4, "pack 24 bit base");
|
||||
#endif
|
||||
#if defined (CRAY)
|
||||
#pragma _CRI ivdep
|
||||
#elif defined (SX)
|
||||
#pragma vdir nodep
|
||||
#elif defined (__uxp__)
|
||||
#pragma loop novrec
|
||||
#endif
|
||||
for ( i = 0; i < datasize; i++ )
|
||||
{
|
||||
tmp = ((data[i] - zref) * factor + 0.5);
|
||||
ival = (unsigned long) tmp;
|
||||
lGrib[z ] = ival >> 16;
|
||||
lGrib[z+1] = ival >> 8;
|
||||
lGrib[z+2] = ival;
|
||||
z += 3;
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(4);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits == 32 )
|
||||
{
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(5, "pack 32 bit base");
|
||||
#endif
|
||||
#if defined (CRAY)
|
||||
#pragma _CRI ivdep
|
||||
#elif defined (SX)
|
||||
#pragma vdir nodep
|
||||
#elif defined (__uxp__)
|
||||
#pragma loop novrec
|
||||
#endif
|
||||
for ( i = 0; i < datasize; i++ )
|
||||
{
|
||||
tmp = ((data[i] - zref) * factor + 0.5);
|
||||
ival = (unsigned long) tmp;
|
||||
lGrib[z ] = ival >> 24;
|
||||
lGrib[z+1] = ival >> 16;
|
||||
lGrib[z+2] = ival >> 8;
|
||||
lGrib[z+3] = ival;
|
||||
z += 4;
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(5);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits > 0 && numBits <= 32 )
|
||||
{
|
||||
encode_double_array_common(numBits, 0, datasize, lGrib,
|
||||
data, zref, factor, &z);
|
||||
}
|
||||
else if ( numBits == 0 )
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
Error("Unimplemented packing factor %d!", numBits);
|
||||
}
|
||||
|
||||
*gz = z;
|
||||
}
|
||||
|
||||
static
|
||||
void encode_double_array_unrolled(int numBits, long packStart, long datasize,
|
||||
GRIBPACK *restrict lGrib,
|
||||
const double *restrict data,
|
||||
double zref, double factor, long *restrict gz)
|
||||
{
|
||||
U_BYTEORDER;
|
||||
long i, j, z = *gz;
|
||||
double tmp;
|
||||
#ifdef _ARCH_PWR6
|
||||
#define __UNROLL_DEPTH_2 8
|
||||
#else
|
||||
#define __UNROLL_DEPTH_2 8
|
||||
#endif
|
||||
|
||||
data += packStart;
|
||||
datasize -= packStart;
|
||||
|
||||
{
|
||||
long residual = datasize % __UNROLL_DEPTH_2;
|
||||
long ofs = datasize - residual;
|
||||
double dval[__UNROLL_DEPTH_2];
|
||||
unsigned long ival;
|
||||
|
||||
/* reducing FP operations to single FMA is slowing down on pwr6 ... */
|
||||
|
||||
if ( numBits == 8 )
|
||||
{
|
||||
unsigned char *cgrib = (unsigned char *) (lGrib + z);
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(2, "pack 8 bit unrolled");
|
||||
#endif
|
||||
for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
dval[j] = ((data[i+j] - zref) * factor + 0.5);
|
||||
}
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
*cgrib = (unsigned long) dval[j];
|
||||
cgrib++;
|
||||
z++;
|
||||
}
|
||||
}
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
dval[j] = ((data[ofs+j] - zref) * factor + 0.5);
|
||||
}
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
*cgrib = (unsigned long) dval[j];
|
||||
cgrib++;
|
||||
z++;
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(2);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits == 16 )
|
||||
{
|
||||
unsigned short *sgrib = (unsigned short *) (lGrib + z);
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(3, "pack 16 bit unrolled");
|
||||
#endif
|
||||
for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
dval[j] = ((data[i+j] - zref) * factor + 0.5);
|
||||
}
|
||||
if ( IS_BIGENDIAN() )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
*sgrib = (unsigned long) dval[j];
|
||||
sgrib++;
|
||||
z += 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
ival = (unsigned long) dval[j];
|
||||
lGrib[z ] = ival >> 8;
|
||||
lGrib[z+1] = ival;
|
||||
z += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
dval[j] = ((data[ofs+j] - zref) * factor + 0.5);
|
||||
}
|
||||
if ( IS_BIGENDIAN() )
|
||||
{
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
*sgrib = (unsigned long) dval[j];
|
||||
sgrib++;
|
||||
z += 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
ival = (unsigned long) dval[j];
|
||||
lGrib[z ] = ival >> 8;
|
||||
lGrib[z+1] = ival;
|
||||
z += 2;
|
||||
}
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(3);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits == 24 )
|
||||
{
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(4, "pack 24 bit unrolled");
|
||||
#endif
|
||||
for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
dval[j] = ((data[i+j] - zref) * factor + 0.5);
|
||||
}
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
ival = (unsigned long) dval[j];
|
||||
lGrib[z ] = ival >> 16;
|
||||
lGrib[z+1] = ival >> 8;
|
||||
lGrib[z+2] = ival;
|
||||
z += 3;
|
||||
}
|
||||
}
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
dval[j] = ((data[ofs+j] - zref) * factor + 0.5);
|
||||
}
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
ival = (unsigned long) dval[j];
|
||||
lGrib[z ] = ival >> 16;
|
||||
lGrib[z+1] = ival >> 8;
|
||||
lGrib[z+2] = ival;
|
||||
z += 3;
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(4);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits == 32 )
|
||||
{
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(5, "pack 32 bit unrolled");
|
||||
#endif
|
||||
unsigned int *igrib = (unsigned int *) (lGrib + z);
|
||||
for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_2 )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
dval[j] = ((data[i+j] - zref) * factor + 0.5);
|
||||
}
|
||||
if ( IS_BIGENDIAN() )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
*igrib = (unsigned long) dval[j];
|
||||
igrib++;
|
||||
z += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_2; j++)
|
||||
{
|
||||
ival = (unsigned long) dval[j];
|
||||
lGrib[z ] = ival >> 24;
|
||||
lGrib[z+1] = ival >> 16;
|
||||
lGrib[z+2] = ival >> 8;
|
||||
lGrib[z+3] = ival;
|
||||
z += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
dval[j] = ((data[ofs+j] - zref) * factor + 0.5);
|
||||
}
|
||||
if ( IS_BIGENDIAN() )
|
||||
{
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
*igrib = (unsigned long) dval[j];
|
||||
igrib++;
|
||||
z += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
ival = (unsigned long) dval[j];
|
||||
lGrib[z ] = ival >> 24;
|
||||
lGrib[z+1] = ival >> 16;
|
||||
lGrib[z+2] = ival >> 8;
|
||||
lGrib[z+3] = ival;
|
||||
z += 4;
|
||||
}
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(5);
|
||||
#endif
|
||||
}
|
||||
else if ( numBits > 0 && numBits <= 32 )
|
||||
{
|
||||
encode_double_array_common(numBits, 0, datasize, lGrib,
|
||||
data, zref, factor, &z);
|
||||
}
|
||||
else if ( numBits == 0 )
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
Error("Unimplemented packing factor %d!", numBits);
|
||||
}
|
||||
}
|
||||
|
||||
*gz = z;
|
||||
#undef __UNROLL_DEPTH_2
|
||||
}
|
|
@ -156,27 +156,27 @@ static void init_class(grib_accessor_class* c)
|
|||
|
||||
static void init(grib_accessor* a,const long v, grib_arguments* args)
|
||||
{
|
||||
grib_accessor_data_simple_packing *self =(grib_accessor_data_simple_packing*)a;
|
||||
self->units_factor = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->units_bias = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->changing_precision = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->number_of_values = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->bits_per_value = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->reference_value = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->binary_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->decimal_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
a->flags |= GRIB_ACCESSOR_FLAG_DATA;
|
||||
self->dirty=1;
|
||||
grib_accessor_data_simple_packing *self =(grib_accessor_data_simple_packing*)a;
|
||||
self->units_factor = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->units_bias = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->changing_precision = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->number_of_values = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->bits_per_value = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->reference_value = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->binary_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
self->decimal_scale_factor = grib_arguments_get_name(a->parent->h,args,self->carg++);
|
||||
a->flags |= GRIB_ACCESSOR_FLAG_DATA;
|
||||
self->dirty=1;
|
||||
|
||||
}
|
||||
|
||||
static unsigned long nbits[32]={
|
||||
0x1, 0x2, 0x4, 0x8, 0x10, 0x20,
|
||||
0x40, 0x80, 0x100, 0x200, 0x400, 0x800,
|
||||
0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000,
|
||||
0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000,
|
||||
0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000, 0x20000000,
|
||||
0x40000000, 0x80000000
|
||||
0x1, 0x2, 0x4, 0x8, 0x10, 0x20,
|
||||
0x40, 0x80, 0x100, 0x200, 0x400, 0x800,
|
||||
0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000,
|
||||
0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000,
|
||||
0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000, 0x20000000,
|
||||
0x40000000, 0x80000000
|
||||
};
|
||||
|
||||
GRIB_INLINE static long number_of_bits(unsigned long x) {
|
||||
|
@ -188,459 +188,466 @@ GRIB_INLINE static long number_of_bits(unsigned long x) {
|
|||
|
||||
|
||||
static long value_count(grib_accessor* a){
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
|
||||
long number_of_values;
|
||||
long number_of_values;
|
||||
|
||||
if(grib_get_long_internal(a->parent->h,self->number_of_values,&number_of_values) != GRIB_SUCCESS)
|
||||
return 0;
|
||||
|
||||
return number_of_values;
|
||||
|
||||
if(grib_get_long_internal(a->parent->h,self->number_of_values,&number_of_values) != GRIB_SUCCESS)
|
||||
return 0;
|
||||
|
||||
return number_of_values;
|
||||
|
||||
}
|
||||
|
||||
static int unpack_double_element(grib_accessor* a, size_t idx, double* val)
|
||||
{
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
|
||||
size_t n_vals;
|
||||
int err = 0;
|
||||
size_t n_vals;
|
||||
int err = 0;
|
||||
|
||||
double reference_value;
|
||||
long binary_scale_factor;
|
||||
long bits_per_value;
|
||||
long decimal_scale_factor;
|
||||
unsigned char* buf = (unsigned char*)a->parent->h->buffer->data;
|
||||
double s = 0;
|
||||
double d = 0;
|
||||
long pos = 0;
|
||||
size_t o = 0;
|
||||
double reference_value;
|
||||
long binary_scale_factor;
|
||||
long bits_per_value;
|
||||
long decimal_scale_factor;
|
||||
unsigned char* buf = (unsigned char*)a->parent->h->buffer->data;
|
||||
double s = 0;
|
||||
double d = 0;
|
||||
long pos = 0;
|
||||
size_t o = 0;
|
||||
|
||||
n_vals = grib_value_count(a);
|
||||
if(n_vals==0){
|
||||
return GRIB_NOT_FOUND;
|
||||
}
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
self->dirty=0;
|
||||
|
||||
if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
/* Special case */
|
||||
|
||||
if (bits_per_value == 0) {
|
||||
*val=reference_value;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
s = grib_power(binary_scale_factor,2);
|
||||
d = grib_power(-decimal_scale_factor,10) ;
|
||||
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"grib_accessor_data_simple_packing : unpack_double : creating %s, %d values",
|
||||
a->name, n_vals);
|
||||
|
||||
buf += grib_byte_offset(a);
|
||||
|
||||
Assert(((bits_per_value*n_vals)/8) < (1<<29));
|
||||
/*ensure that the bit pointer is not overflown*/
|
||||
|
||||
if(bits_per_value%8)
|
||||
{
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ",
|
||||
bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor);
|
||||
pos=idx*bits_per_value;
|
||||
*val= (double) (((
|
||||
grib_decode_unsigned_long(buf, &pos, bits_per_value)*s)+reference_value)*d);
|
||||
/* val[i] = grib_decode_unsigned_long(buf, &pos, bits_per_value); */
|
||||
/* fprintf(stdout,"unpck uuu-o: %d vals %d bitspv buf %d by long \n", n_vals, bits_per_value, pos/8);*/
|
||||
}
|
||||
else
|
||||
{
|
||||
int bc;
|
||||
long lvalue = 0;
|
||||
int l = bits_per_value/8;
|
||||
|
||||
pos=idx*l;
|
||||
buf+=pos;
|
||||
lvalue = 0;
|
||||
lvalue <<= 8;
|
||||
lvalue |= buf[o++] ;
|
||||
|
||||
for ( bc=1; bc<l; bc++ ) {
|
||||
lvalue <<= 8;
|
||||
lvalue |= buf[o++] ;
|
||||
n_vals = grib_value_count(a);
|
||||
if(n_vals==0){
|
||||
return GRIB_NOT_FOUND;
|
||||
}
|
||||
*val = (double) (((lvalue*s)+reference_value)*d);
|
||||
|
||||
}
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
return err;
|
||||
self->dirty=0;
|
||||
|
||||
if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
/* Special case */
|
||||
|
||||
if (bits_per_value == 0) {
|
||||
*val=reference_value;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
s = grib_power(binary_scale_factor,2);
|
||||
d = grib_power(-decimal_scale_factor,10) ;
|
||||
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"grib_accessor_data_simple_packing : unpack_double : creating %s, %d values",
|
||||
a->name, n_vals);
|
||||
|
||||
buf += grib_byte_offset(a);
|
||||
|
||||
Assert(((bits_per_value*n_vals)/8) < (1<<29));
|
||||
/*ensure that the bit pointer is not overflown*/
|
||||
|
||||
if(bits_per_value%8)
|
||||
{
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ",
|
||||
bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor);
|
||||
pos=idx*bits_per_value;
|
||||
*val= (double) (((
|
||||
grib_decode_unsigned_long(buf, &pos, bits_per_value)*s)+reference_value)*d);
|
||||
/* val[i] = grib_decode_unsigned_long(buf, &pos, bits_per_value); */
|
||||
/* fprintf(stdout,"unpck uuu-o: %d vals %d bitspv buf %d by long \n", n_vals, bits_per_value, pos/8);*/
|
||||
}
|
||||
else
|
||||
{
|
||||
int bc;
|
||||
long lvalue = 0;
|
||||
int l = bits_per_value/8;
|
||||
|
||||
pos=idx*l;
|
||||
buf+=pos;
|
||||
lvalue = 0;
|
||||
lvalue <<= 8;
|
||||
lvalue |= buf[o++] ;
|
||||
|
||||
for ( bc=1; bc<l; bc++ ) {
|
||||
lvalue <<= 8;
|
||||
lvalue |= buf[o++] ;
|
||||
}
|
||||
*val = (double) (((lvalue*s)+reference_value)*d);
|
||||
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int _unpack_double(grib_accessor* a, double* val, size_t *len,unsigned char* buf,long pos, size_t n_vals)
|
||||
{
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
|
||||
size_t i = 0;
|
||||
int err = 0;
|
||||
size_t i = 0;
|
||||
int err = 0;
|
||||
|
||||
double reference_value;
|
||||
long binary_scale_factor;
|
||||
long bits_per_value;
|
||||
long decimal_scale_factor;
|
||||
double s = 0;
|
||||
double d = 0;
|
||||
double units_factor=1.0;
|
||||
double units_bias=0.0;
|
||||
double reference_value;
|
||||
long binary_scale_factor;
|
||||
long bits_per_value;
|
||||
long decimal_scale_factor;
|
||||
double s = 0;
|
||||
double d = 0;
|
||||
double units_factor=1.0;
|
||||
double units_bias=0.0;
|
||||
|
||||
|
||||
if(*len < n_vals)
|
||||
{
|
||||
if(*len < n_vals)
|
||||
{
|
||||
*len = (long) n_vals;
|
||||
return GRIB_ARRAY_TOO_SMALL;
|
||||
}
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
/*/
|
||||
* check we don't decode bpv > max(ulong) as it is
|
||||
* not currently supported by the algorithm
|
||||
*/
|
||||
if ( bits_per_value > (sizeof(long)*8) ) {
|
||||
return GRIB_INVALID_BPV;
|
||||
}
|
||||
|
||||
if(self->units_factor &&
|
||||
(grib_get_double_internal(a->parent->h,self->units_factor,&units_factor)== GRIB_SUCCESS)) {
|
||||
grib_set_double_internal(a->parent->h,self->units_factor,1.0);
|
||||
}
|
||||
|
||||
if(self->units_bias &&
|
||||
(grib_get_double_internal(a->parent->h,self->units_bias,&units_bias)== GRIB_SUCCESS)) {
|
||||
grib_set_double_internal(a->parent->h,self->units_bias,0.0);
|
||||
}
|
||||
|
||||
if(n_vals==0){
|
||||
*len = 0;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
self->dirty=0;
|
||||
|
||||
if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
/* Special case */
|
||||
|
||||
if(bits_per_value == 0)
|
||||
{
|
||||
for(i = 0; i < n_vals; i++)
|
||||
val[i] = reference_value;
|
||||
*len = n_vals;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
s = grib_power(binary_scale_factor,2);
|
||||
d = grib_power(-decimal_scale_factor,10) ;
|
||||
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"grib_accessor_data_simple_packing : unpack_double : creating %s, %d values",
|
||||
a->name, n_vals);
|
||||
|
||||
buf += grib_byte_offset(a);
|
||||
|
||||
Assert(((bits_per_value*n_vals)/8) < (1<<29));
|
||||
/*ensure that the bit pointer is not overflown*/
|
||||
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ",
|
||||
bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor);
|
||||
grib_decode_double_array(buf,&pos,bits_per_value,reference_value,s,d,n_vals,val);
|
||||
|
||||
*len = (long) n_vals;
|
||||
return GRIB_ARRAY_TOO_SMALL;
|
||||
}
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
if (units_factor != 1.0) {
|
||||
if (units_bias != 0.0)
|
||||
for (i=0;i<n_vals;i++) val[i]=val[i]*units_factor+units_bias;
|
||||
else
|
||||
for (i=0;i<n_vals;i++) val[i]*=units_factor;
|
||||
} else if (units_bias != 0.0)
|
||||
for (i=0;i<n_vals;i++) val[i]+=units_bias;
|
||||
|
||||
return err;
|
||||
|
||||
/*/
|
||||
* check we don't decode bpv > max(ulong) as it is
|
||||
* not currently supported by the algorithm
|
||||
*/
|
||||
if ( bits_per_value > (sizeof(long)*8) ) {
|
||||
return GRIB_INVALID_BPV;
|
||||
}
|
||||
|
||||
if(self->units_factor &&
|
||||
(grib_get_double_internal(a->parent->h,self->units_factor,&units_factor)== GRIB_SUCCESS)) {
|
||||
grib_set_double_internal(a->parent->h,self->units_factor,1.0);
|
||||
}
|
||||
|
||||
if(self->units_bias &&
|
||||
(grib_get_double_internal(a->parent->h,self->units_bias,&units_bias)== GRIB_SUCCESS)) {
|
||||
grib_set_double_internal(a->parent->h,self->units_bias,0.0);
|
||||
}
|
||||
|
||||
if(n_vals==0){
|
||||
*len = 0;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
self->dirty=0;
|
||||
|
||||
if((err = grib_get_double_internal(a->parent->h,self->reference_value, &reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
/* Special case */
|
||||
|
||||
if(bits_per_value == 0)
|
||||
{
|
||||
for(i = 0; i < n_vals; i++)
|
||||
val[i] = reference_value;
|
||||
*len = n_vals;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
s = grib_power(binary_scale_factor,2);
|
||||
d = grib_power(-decimal_scale_factor,10) ;
|
||||
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"grib_accessor_data_simple_packing : unpack_double : creating %s, %d values",
|
||||
a->name, n_vals);
|
||||
|
||||
buf += grib_byte_offset(a);
|
||||
|
||||
Assert(((bits_per_value*n_vals)/8) < (1<<29));
|
||||
/*ensure that the bit pointer is not overflown*/
|
||||
|
||||
grib_context_log(a->parent->h->context, GRIB_LOG_DEBUG,
|
||||
"unpack_double : calling outline function : bpv %d, rv : %g, sf : %d, dsf : %d ",
|
||||
bits_per_value,reference_value,binary_scale_factor, decimal_scale_factor);
|
||||
grib_decode_double_array(buf,&pos,bits_per_value,reference_value,s,d,n_vals,val);
|
||||
|
||||
*len = (long) n_vals;
|
||||
|
||||
if (units_factor != 1.0) {
|
||||
if (units_bias != 0.0)
|
||||
for (i=0;i<n_vals;i++) val[i]=val[i]*units_factor+units_bias;
|
||||
else
|
||||
for (i=0;i<n_vals;i++) val[i]*=units_factor;
|
||||
} else if (units_bias != 0.0)
|
||||
for (i=0;i<n_vals;i++) val[i]+=units_bias;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int unpack_double_subarray(grib_accessor* a, double* val, size_t start, size_t len)
|
||||
{
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
unsigned char* buf = (unsigned char*)a->parent->h->buffer->data;
|
||||
size_t nvals = len;
|
||||
size_t *plen=&len;
|
||||
long bits_per_value=0;
|
||||
long pos;
|
||||
int err;
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
unsigned char* buf = (unsigned char*)a->parent->h->buffer->data;
|
||||
size_t nvals = len;
|
||||
size_t *plen=&len;
|
||||
long bits_per_value=0;
|
||||
long pos;
|
||||
int err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
buf+=(start*bits_per_value)/8;
|
||||
pos=start*bits_per_value%8;
|
||||
return _unpack_double(a,val,plen,buf,pos,nvals);
|
||||
buf+=(start*bits_per_value)/8;
|
||||
pos=start*bits_per_value%8;
|
||||
return _unpack_double(a,val,plen,buf,pos,nvals);
|
||||
}
|
||||
|
||||
static int unpack_double(grib_accessor* a, double* val, size_t *len) {
|
||||
unsigned char* buf = (unsigned char*)a->parent->h->buffer->data;
|
||||
size_t nvals = grib_value_count(a);
|
||||
long pos=0;
|
||||
return _unpack_double(a,val,len,buf,pos,nvals);
|
||||
unsigned char* buf = (unsigned char*)a->parent->h->buffer->data;
|
||||
size_t nvals = grib_value_count(a);
|
||||
long pos=0;
|
||||
return _unpack_double(a,val,len,buf,pos,nvals);
|
||||
}
|
||||
|
||||
#if GRIB_IBMPOWER67_OPT
|
||||
#define restrict
|
||||
#include "minmax_val.c"
|
||||
#undef restrict
|
||||
#endif
|
||||
|
||||
static int pack_double(grib_accessor* a, const double* val, size_t *len)
|
||||
{
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
grib_accessor_data_simple_packing* self = (grib_accessor_data_simple_packing*)a;
|
||||
|
||||
size_t i = 0;
|
||||
size_t n_vals = *len;
|
||||
int err = 0;
|
||||
int last;
|
||||
double reference_value = 0;
|
||||
long binary_scale_factor = 0;
|
||||
long bits_per_value = 0;
|
||||
long decimal_scale_factor = 0;
|
||||
long decimal_scale_factor_get = 0;
|
||||
double decimal = 1;
|
||||
double max = 0;
|
||||
double min = 0;
|
||||
double unscaled_max = 0;
|
||||
double unscaled_min = 0;
|
||||
double f=0;
|
||||
double range=0;
|
||||
double minrange=0,maxrange=0;
|
||||
long changing_precision=0;
|
||||
grib_context* c=a->parent->h->context;
|
||||
size_t i = 0;
|
||||
size_t n_vals = *len;
|
||||
int err = 0;
|
||||
int last;
|
||||
double reference_value = 0;
|
||||
long binary_scale_factor = 0;
|
||||
long bits_per_value = 0;
|
||||
long decimal_scale_factor = 0;
|
||||
long decimal_scale_factor_get = 0;
|
||||
double decimal = 1;
|
||||
double max = 0;
|
||||
double min = 0;
|
||||
double unscaled_max = 0;
|
||||
double unscaled_min = 0;
|
||||
double f=0;
|
||||
double range=0;
|
||||
double minrange=0,maxrange=0;
|
||||
long changing_precision=0;
|
||||
grib_context* c=a->parent->h->context;
|
||||
|
||||
decimal_scale_factor=0;
|
||||
decimal_scale_factor=0;
|
||||
|
||||
if (*len == 0) {
|
||||
return GRIB_NO_VALUES;
|
||||
}
|
||||
if (*len == 0) {
|
||||
return GRIB_NO_VALUES;
|
||||
}
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_get_long_internal(a->parent->h,self->bits_per_value,&bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if(*len == 0) return GRIB_SUCCESS;
|
||||
if(*len == 0) return GRIB_SUCCESS;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor_get))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
/*/
|
||||
* check we don't encode bpv > max(ulong)-1 as it is
|
||||
* not currently supported by the algorithm
|
||||
*/
|
||||
if ( bits_per_value > (sizeof(long)*8-1) ) {
|
||||
return GRIB_INVALID_BPV;
|
||||
}
|
||||
if((err = grib_get_long_internal(a->parent->h,self->decimal_scale_factor, &decimal_scale_factor_get))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
/*/
|
||||
* check we don't encode bpv > max(ulong)-1 as it is
|
||||
* not currently supported by the algorithm
|
||||
*/
|
||||
if ( bits_per_value > (sizeof(long)*8-1) ) {
|
||||
return GRIB_INVALID_BPV;
|
||||
}
|
||||
|
||||
self->dirty=1;
|
||||
self->dirty=1;
|
||||
|
||||
max = val[0];
|
||||
min = max;
|
||||
for(i=1;i< n_vals;i++) {
|
||||
if (val[i] > max ) max = val[i];
|
||||
if (val[i] < min ) min = val[i];
|
||||
}
|
||||
max = val[0];
|
||||
min = max;
|
||||
#if GRIB_IBMPOWER67_OPT
|
||||
minmax_val(val+1, n_vals-1, &min, &max);
|
||||
#else
|
||||
for(i=1;i< n_vals;i++) {
|
||||
if (val[i] > max ) max = val[i];
|
||||
if (val[i] < min ) min = val[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
/* constant field only reference_value is set and bits_per_value=0 */
|
||||
if(max==min) {
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,val[0],&reference_value)
|
||||
!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
/* constant field only reference_value is set and bits_per_value=0 */
|
||||
if(max==min) {
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,val[0],&reference_value)
|
||||
!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
{
|
||||
/* Make sure we can decode it again */
|
||||
double ref = 1e-100;
|
||||
grib_get_double_internal(a->parent->h,self->reference_value,&ref);
|
||||
if (ref != reference_value)
|
||||
printf("%.20e != %.20e",ref,reference_value);
|
||||
Assert(ref == reference_value);
|
||||
}
|
||||
{
|
||||
/* Make sure we can decode it again */
|
||||
double ref = 1e-100;
|
||||
grib_get_double_internal(a->parent->h,self->reference_value,&ref);
|
||||
if (ref != reference_value)
|
||||
printf("%.20e != %.20e",ref,reference_value);
|
||||
Assert(ref == reference_value);
|
||||
}
|
||||
|
||||
if (c->large_constant_fields || (c->gribex_mode_on==1 && self->edition==1)) {
|
||||
if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, 0)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if (c->large_constant_fields || (c->gribex_mode_on==1 && self->edition==1)) {
|
||||
if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, 0)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, 0)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, 0)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if (bits_per_value==0) {
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, 16)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
}
|
||||
if (bits_per_value==0) {
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, 16)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
}
|
||||
|
||||
return GRIB_SUCCESS;
|
||||
} else {
|
||||
bits_per_value=0;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
return GRIB_SUCCESS;
|
||||
} else {
|
||||
bits_per_value=0;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
return GRIB_CONSTANT_FIELD;
|
||||
}
|
||||
}
|
||||
return GRIB_CONSTANT_FIELD;
|
||||
}
|
||||
}
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_get_long_internal(a->parent->h,self->binary_scale_factor, &binary_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_get_long_internal(a->parent->h,self->changing_precision, &changing_precision))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_get_long_internal(a->parent->h,self->changing_precision, &changing_precision))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
/* the packing parameters are not properly defined
|
||||
/* the packing parameters are not properly defined
|
||||
this is a safe way of fixing the problem */
|
||||
if ( changing_precision==0 && bits_per_value==0 && decimal_scale_factor_get==0) {
|
||||
if ( changing_precision==0 && bits_per_value==0 && decimal_scale_factor_get==0) {
|
||||
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_WARNING,
|
||||
"%s==0 and %s==0 (setting %s=24)",
|
||||
self->bits_per_value,
|
||||
self->decimal_scale_factor,
|
||||
self->bits_per_value);
|
||||
|
||||
bits_per_value=24;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value,
|
||||
bits_per_value))!= GRIB_SUCCESS)
|
||||
return err;
|
||||
}
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_WARNING,
|
||||
"%s==0 and %s==0 (setting %s=24)",
|
||||
self->bits_per_value,
|
||||
self->decimal_scale_factor,
|
||||
self->bits_per_value);
|
||||
|
||||
if ( bits_per_value == 0 || (binary_scale_factor==0 && decimal_scale_factor_get!=0) ) {
|
||||
/* decimal_scale_factor is given, binary_scale_factor=0
|
||||
bits_per_value=24;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value,
|
||||
bits_per_value))!= GRIB_SUCCESS)
|
||||
return err;
|
||||
}
|
||||
|
||||
if ( bits_per_value == 0 || (binary_scale_factor==0 && decimal_scale_factor_get!=0) ) {
|
||||
/* decimal_scale_factor is given, binary_scale_factor=0
|
||||
and bits_per_value is computed */
|
||||
binary_scale_factor=0;
|
||||
decimal_scale_factor=decimal_scale_factor_get;
|
||||
decimal = grib_power(decimal_scale_factor,10) ;
|
||||
min*=decimal;
|
||||
max*=decimal;
|
||||
binary_scale_factor=0;
|
||||
decimal_scale_factor=decimal_scale_factor_get;
|
||||
decimal = grib_power(decimal_scale_factor,10) ;
|
||||
min*=decimal;
|
||||
max*=decimal;
|
||||
|
||||
/* bits_per_value=(long)ceil(log((double)(imax-imin+1))/log(2.0)); */
|
||||
bits_per_value=number_of_bits((unsigned long)fabs(max-min));
|
||||
/*printf("bits_per_value=%ld\n",bits_per_value);*/
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value)
|
||||
!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
/* divisor=1; */
|
||||
} else {
|
||||
last=127;
|
||||
if (c->gribex_mode_on && self->edition==1) last=99;
|
||||
/* bits_per_value is given and decimal_scale_factor
|
||||
and binary_scale_factor are calcualated
|
||||
*/
|
||||
if (max == min) {
|
||||
binary_scale_factor=0;
|
||||
/* divisor=1; */
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value)
|
||||
!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
/* bits_per_value=(long)ceil(log((double)(imax-imin+1))/log(2.0)); */
|
||||
bits_per_value=number_of_bits((unsigned long)fabs(max-min));
|
||||
/*printf("bits_per_value=%ld\n",bits_per_value);*/
|
||||
if((err = grib_set_long_internal(a->parent->h,self->bits_per_value, bits_per_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value)
|
||||
!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
/* divisor=1; */
|
||||
} else {
|
||||
/* printf("max=%g reference_value=%g grib_power(-last,2)=%g decimal_scale_factor=%ld bits_per_value=%ld\n",
|
||||
last=127;
|
||||
if (c->gribex_mode_on && self->edition==1) last=99;
|
||||
/* bits_per_value is given and decimal_scale_factor
|
||||
and binary_scale_factor are calcualated
|
||||
*/
|
||||
if (max == min) {
|
||||
binary_scale_factor=0;
|
||||
/* divisor=1; */
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,min,&reference_value)
|
||||
!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
} else {
|
||||
/* printf("max=%g reference_value=%g grib_power(-last,2)=%g decimal_scale_factor=%ld bits_per_value=%ld\n",
|
||||
max,reference_value,grib_power(-last,2),decimal_scale_factor,bits_per_value);*/
|
||||
/* last must be a parameter coming from the def file*/
|
||||
range=(max-min);
|
||||
unscaled_min=min;
|
||||
unscaled_max=max;
|
||||
f=(grib_power(bits_per_value,2)-1);
|
||||
minrange=grib_power(-last,2)*f;
|
||||
maxrange=grib_power(last,2)*f;
|
||||
/* last must be a parameter coming from the def file*/
|
||||
range=(max-min);
|
||||
unscaled_min=min;
|
||||
unscaled_max=max;
|
||||
f=(grib_power(bits_per_value,2)-1);
|
||||
minrange=grib_power(-last,2)*f;
|
||||
maxrange=grib_power(last,2)*f;
|
||||
|
||||
while (range<minrange) {
|
||||
decimal_scale_factor+=1;
|
||||
decimal*=10;
|
||||
min=unscaled_min*decimal;
|
||||
max=unscaled_max*decimal;
|
||||
range=(max-min);
|
||||
}
|
||||
while (range>maxrange) {
|
||||
decimal_scale_factor-=1;
|
||||
decimal/=10;
|
||||
min=unscaled_min*decimal;
|
||||
max=unscaled_max*decimal;
|
||||
range=(max-min);
|
||||
}
|
||||
while (range<minrange) {
|
||||
decimal_scale_factor+=1;
|
||||
decimal*=10;
|
||||
min=unscaled_min*decimal;
|
||||
max=unscaled_max*decimal;
|
||||
range=(max-min);
|
||||
}
|
||||
while (range>maxrange) {
|
||||
decimal_scale_factor-=1;
|
||||
decimal/=10;
|
||||
min=unscaled_min*decimal;
|
||||
max=unscaled_max*decimal;
|
||||
range=(max-min);
|
||||
}
|
||||
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,
|
||||
min,&reference_value)!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
if (grib_get_nearest_smaller_value(a->parent->h,self->reference_value,
|
||||
min,&reference_value)!=GRIB_SUCCESS) {
|
||||
grib_context_log(a->parent->h->context,GRIB_LOG_ERROR,
|
||||
"unable to find nearest_smaller_value of %g for %s",min,self->reference_value);
|
||||
exit(GRIB_INTERNAL_ERROR);
|
||||
}
|
||||
|
||||
binary_scale_factor = grib_get_binary_scale_fact(max,reference_value,bits_per_value,&err);
|
||||
binary_scale_factor = grib_get_binary_scale_fact(max,reference_value,bits_per_value,&err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_double_internal(a->parent->h,self->reference_value, reference_value)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
if((err = grib_set_long_internal(a->parent->h,self->changing_precision, 0)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, binary_scale_factor)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, decimal_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->changing_precision, 0)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->binary_scale_factor, binary_scale_factor)) !=
|
||||
GRIB_SUCCESS)
|
||||
return err;
|
||||
if((err = grib_set_long_internal(a->parent->h,self->decimal_scale_factor, decimal_scale_factor))
|
||||
!= GRIB_SUCCESS)
|
||||
return err;
|
||||
|
||||
|
||||
return GRIB_SUCCESS;
|
||||
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
145
src/grib_bits.c
145
src/grib_bits.c
|
@ -36,129 +36,133 @@ static int max_nbits = sizeof(unsigned long)*8;
|
|||
|
||||
unsigned long grib_decode_unsigned_byte_long(const unsigned char* p, long o, int l)
|
||||
{
|
||||
long accum = 0;
|
||||
int i = 0;
|
||||
unsigned char b = p[o++];
|
||||
long accum = 0;
|
||||
int i = 0;
|
||||
unsigned char b = p[o++];
|
||||
|
||||
Assert(l <= max_nbits);
|
||||
Assert(l <= max_nbits);
|
||||
|
||||
accum <<= 8;
|
||||
accum |= b ;
|
||||
|
||||
for ( i=1; i<l; i++ )
|
||||
{
|
||||
b = p[o++];
|
||||
accum <<= 8;
|
||||
accum <<= 8;
|
||||
accum |= b ;
|
||||
}
|
||||
return accum;
|
||||
|
||||
for ( i=1; i<l; i++ )
|
||||
{
|
||||
b = p[o++];
|
||||
accum <<= 8;
|
||||
accum |= b ;
|
||||
}
|
||||
return accum;
|
||||
}
|
||||
|
||||
long grib_decode_signed_long(const unsigned char* p, long o, int l)
|
||||
{
|
||||
long accum = 0;
|
||||
int i = 0;
|
||||
unsigned char b = p[o++];
|
||||
int sign = grib_get_bit(&b, 0);
|
||||
long accum = 0;
|
||||
int i = 0;
|
||||
unsigned char b = p[o++];
|
||||
int sign = grib_get_bit(&b, 0);
|
||||
|
||||
Assert(l <= max_nbits);
|
||||
Assert(l <= max_nbits);
|
||||
|
||||
b &= 0x7f;
|
||||
accum <<= 8;
|
||||
accum |= b ;
|
||||
|
||||
for ( i=1; i<l; i++ )
|
||||
{
|
||||
b = p[o++];
|
||||
accum <<= 8;
|
||||
b &= 0x7f;
|
||||
accum <<= 8;
|
||||
accum |= b ;
|
||||
}
|
||||
if (sign == 0)
|
||||
return accum;
|
||||
else
|
||||
return -accum;
|
||||
|
||||
for ( i=1; i<l; i++ )
|
||||
{
|
||||
b = p[o++];
|
||||
accum <<= 8;
|
||||
accum |= b ;
|
||||
}
|
||||
if (sign == 0)
|
||||
return accum;
|
||||
else
|
||||
return -accum;
|
||||
}
|
||||
|
||||
int grib_encode_signed_long(unsigned char* p, long val , long o, int l)
|
||||
{
|
||||
unsigned short accum = 0;
|
||||
int i = 0;
|
||||
int off = o;
|
||||
int sign = (val<0);
|
||||
unsigned short accum = 0;
|
||||
int i = 0;
|
||||
int off = o;
|
||||
int sign = (val<0);
|
||||
|
||||
Assert(l <= max_nbits);
|
||||
Assert(l <= max_nbits);
|
||||
|
||||
if (sign) val *= -1;
|
||||
if (sign) val *= -1;
|
||||
|
||||
for ( i=0; i<l; i++ )
|
||||
{
|
||||
accum = val >> (l*8-(8*(i+1)));
|
||||
p[o++] = accum ;
|
||||
}
|
||||
for ( i=0; i<l; i++ )
|
||||
{
|
||||
accum = val >> (l*8-(8*(i+1)));
|
||||
p[o++] = accum ;
|
||||
}
|
||||
|
||||
if (sign) p[off] |= 128;
|
||||
if (sign) p[off] |= 128;
|
||||
|
||||
return GRIB_SUCCESS;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
static void grib_set_bit_on( unsigned char* p, long* bitp){
|
||||
|
||||
p += *bitp/8;
|
||||
*p |= (1u << (7-((*bitp)%8)));
|
||||
(*bitp)++;
|
||||
p += *bitp/8;
|
||||
*p |= (1u << (7-((*bitp)%8)));
|
||||
(*bitp)++;
|
||||
}
|
||||
|
||||
static void grib_set_bit_off( unsigned char* p, long* bitp){
|
||||
|
||||
p += *bitp/8;
|
||||
*p &= ~(1u << (7-((*bitp)%8)));
|
||||
(*bitp)++;
|
||||
p += *bitp/8;
|
||||
*p &= ~(1u << (7-((*bitp)%8)));
|
||||
(*bitp)++;
|
||||
}
|
||||
|
||||
int grib_get_bit(const unsigned char* p, long bitp){
|
||||
p += (bitp >> 3);
|
||||
return (*p&(1<<(7-(bitp%8))));
|
||||
p += (bitp >> 3);
|
||||
return (*p&(1<<(7-(bitp%8))));
|
||||
}
|
||||
|
||||
void grib_set_bit( unsigned char* p, long bitp, int val){
|
||||
|
||||
if(val == 0) grib_set_bit_off(p,&bitp);
|
||||
else grib_set_bit_on(p,&bitp);
|
||||
if(val == 0) grib_set_bit_off(p,&bitp);
|
||||
else grib_set_bit_on(p,&bitp);
|
||||
}
|
||||
|
||||
long grib_decode_signed_longb(const unsigned char* p, long *bitp, long nbits)
|
||||
{
|
||||
int sign = grib_get_bit(p, *bitp);
|
||||
long val = 0;
|
||||
int sign = grib_get_bit(p, *bitp);
|
||||
long val = 0;
|
||||
|
||||
Assert(nbits <= max_nbits);
|
||||
Assert(nbits <= max_nbits);
|
||||
|
||||
*bitp += 1;
|
||||
*bitp += 1;
|
||||
|
||||
val = grib_decode_unsigned_long( p, bitp, nbits-1);
|
||||
val = grib_decode_unsigned_long( p, bitp, nbits-1);
|
||||
|
||||
if (sign)val = -val;
|
||||
if (sign)val = -val;
|
||||
|
||||
|
||||
return val;
|
||||
return val;
|
||||
}
|
||||
|
||||
int grib_encode_signed_longb(unsigned char* p, long val ,long *bitp, long nb)
|
||||
{
|
||||
short sign = val < 0;
|
||||
short sign = val < 0;
|
||||
|
||||
Assert(nb <= max_nbits);
|
||||
Assert(nb <= max_nbits);
|
||||
|
||||
if(sign) val = -val;
|
||||
if(sign) val = -val;
|
||||
|
||||
if(sign)
|
||||
grib_set_bit_on (p, bitp);
|
||||
else
|
||||
grib_set_bit_off(p, bitp);
|
||||
if(sign)
|
||||
grib_set_bit_on (p, bitp);
|
||||
else
|
||||
grib_set_bit_off(p, bitp);
|
||||
|
||||
return grib_encode_unsigned_longb(p, val ,bitp, nb-1);
|
||||
return grib_encode_unsigned_longb(p, val ,bitp, nb-1);
|
||||
}
|
||||
|
||||
#if GRIB_IBMPOWER67_OPT
|
||||
|
||||
#include "grib_bits_ibmpow.c"
|
||||
|
||||
#else
|
||||
#if FAST_BIG_ENDIAN
|
||||
|
||||
#include "grib_bits_fast_big_endian.c"
|
||||
|
@ -169,3 +173,4 @@ int grib_encode_signed_longb(unsigned char* p, long val ,long *bitp, long nb)
|
|||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright 2005-2013 ECMWF.
|
||||
*
|
||||
* This software is licensed under the terms of the Apache Licence Version 2.0
|
||||
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
|
||||
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
|
||||
*/
|
||||
|
||||
|
||||
unsigned long grib_decode_unsigned_long(const unsigned char* p, long *bitp, long nbits)
|
||||
{
|
||||
int i;
|
||||
long ret = 0;
|
||||
long o = *bitp/8;
|
||||
int l = nbits/8;
|
||||
|
||||
if (nbits==0) return 0;
|
||||
|
||||
if(nbits > max_nbits)
|
||||
{
|
||||
int bits = nbits;
|
||||
int mod = bits % max_nbits;
|
||||
|
||||
if(mod != 0)
|
||||
{
|
||||
int e=grib_decode_unsigned_long(p,bitp,mod);
|
||||
Assert( e == 0);
|
||||
bits -= mod;
|
||||
}
|
||||
|
||||
while(bits > max_nbits)
|
||||
{
|
||||
int e=grib_decode_unsigned_long(p,bitp,max_nbits);
|
||||
Assert( e == 0);
|
||||
bits -= max_nbits;
|
||||
}
|
||||
|
||||
return grib_decode_unsigned_long(p,bitp,bits);
|
||||
}
|
||||
|
||||
if((nbits%8 > 0)||(*bitp%8 > 0)) {
|
||||
for(i=0; i< nbits;i++){
|
||||
ret <<= 1;
|
||||
if(grib_get_bit( p, *bitp)) ret += 1;
|
||||
*bitp += 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret <<= 8;
|
||||
ret |= p[o++] ;
|
||||
|
||||
for ( i=1; i<l; i++ )
|
||||
{
|
||||
ret <<= 8;
|
||||
ret |= p[o++] ;
|
||||
}
|
||||
*bitp += nbits;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int grib_encode_unsigned_long(unsigned char* p, unsigned long val ,long *bitp, long nbits)
|
||||
{
|
||||
long len = nbits;
|
||||
int s = *bitp%8;
|
||||
int n = 8-s;
|
||||
unsigned char tmp = 0; /*for temporary results*/
|
||||
|
||||
if(nbits > max_nbits)
|
||||
{
|
||||
/* TODO: Do some real code here, to support long longs */
|
||||
int bits = nbits;
|
||||
int mod = bits % max_nbits;
|
||||
long zero = 0;
|
||||
|
||||
/* printf("Warning: encoding %ld bits=%ld %ld\n",val,nbits,*bitp); */
|
||||
|
||||
if(mod != 0)
|
||||
{
|
||||
int e=grib_encode_unsigned_long(p,zero,bitp,mod);
|
||||
/* printf(" -> : encoding %ld bits=%ld %ld\n",zero,(long)mod,*bitp); */
|
||||
Assert( e == 0);
|
||||
bits -= mod;
|
||||
}
|
||||
|
||||
while(bits > max_nbits)
|
||||
{
|
||||
int e=grib_encode_unsigned_long(p,zero,bitp,max_nbits);
|
||||
/* printf(" -> : encoding %ld bits=%ld %ld\n",zero,(long)max_nbits,*bitp); */
|
||||
Assert(e == 0);
|
||||
bits -= max_nbits;
|
||||
}
|
||||
|
||||
/* printf(" -> : encoding %ld bits=%ld %ld\n",val,(long)bits,*bitp); */
|
||||
return grib_encode_unsigned_long(p,val,bitp,bits);
|
||||
}
|
||||
|
||||
if(s)
|
||||
p += (*bitp >> 3); /* skip the bytes */
|
||||
else
|
||||
p += (*bitp >> 3); /* skip the bytes */
|
||||
|
||||
/* head */
|
||||
if(s) {
|
||||
len -= n;
|
||||
if (len < 0) {
|
||||
tmp = ((val << -len) | ((*p) & dmasks[n]));
|
||||
} else {
|
||||
tmp = ((val >> len) | ((*p) & dmasks[n]));
|
||||
}
|
||||
*p = tmp;
|
||||
(*p)++;
|
||||
}
|
||||
|
||||
/* write the middle words */
|
||||
while(len >= 8)
|
||||
{
|
||||
len -= 8;
|
||||
*p++ = (val >> len);
|
||||
}
|
||||
|
||||
/* write the end bits */
|
||||
if(len) *p = (val << (8-len));
|
||||
|
||||
*bitp += nbits;
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
int grib_encode_unsigned_longb(unsigned char* p, unsigned long val ,long *bitp, long nb)
|
||||
{
|
||||
long i = 0;
|
||||
|
||||
Assert(nb <= max_nbits);
|
||||
|
||||
for(i=nb-1; i >= 0; i--){
|
||||
if(test(val,i))
|
||||
grib_set_bit_on (p, bitp);
|
||||
else
|
||||
grib_set_bit_off(p, bitp);
|
||||
}
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
#include "grib_bits_ibmpow_opt.c"
|
||||
|
|
@ -0,0 +1,298 @@
|
|||
/*
|
||||
* Copyright 2005-2013 ECMWF.
|
||||
*
|
||||
* This software is licensed under the terms of the Apache Licence Version 2.0
|
||||
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
|
||||
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
|
||||
*/
|
||||
|
||||
int grib_decode_long_array(const unsigned char* p, long *bitp, long bitsPerValue,
|
||||
size_t n_vals,long* val) {
|
||||
long i=0;
|
||||
unsigned long lvalue = 0;
|
||||
|
||||
if(bitsPerValue%8)
|
||||
{
|
||||
int j=0;
|
||||
for(i=0;i < n_vals;i++) {
|
||||
lvalue=0;
|
||||
for(j=0; j< bitsPerValue;j++){
|
||||
lvalue <<= 1;
|
||||
if(grib_get_bit( p, *bitp)) lvalue += 1;
|
||||
*bitp += 1;
|
||||
}
|
||||
val[i] = lvalue;
|
||||
}
|
||||
} else {
|
||||
int bc;
|
||||
int l = bitsPerValue/8;
|
||||
size_t o = *bitp/8;
|
||||
|
||||
for(i=0;i < n_vals;i++)
|
||||
{
|
||||
lvalue = 0;
|
||||
lvalue <<= 8;
|
||||
lvalue |= p[o++] ;
|
||||
|
||||
for ( bc=1; bc<l; bc++ )
|
||||
{
|
||||
lvalue <<= 8;
|
||||
lvalue |= p[o++] ;
|
||||
}
|
||||
val[i] = lvalue;
|
||||
}
|
||||
*bitp+=bitsPerValue*n_vals;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int grib_decode_double_array(const unsigned char* p, long *bitp, long bitsPerValue,
|
||||
double reference_value,double s,double d,
|
||||
size_t n_vals,double* val) {
|
||||
long i=0;
|
||||
unsigned long lvalue = 0;
|
||||
double x;
|
||||
|
||||
if(bitsPerValue%8)
|
||||
{
|
||||
int j=0;
|
||||
for(i=0;i < n_vals;i++) {
|
||||
lvalue=0;
|
||||
for(j=0; j< bitsPerValue;j++){
|
||||
lvalue <<= 1;
|
||||
if(grib_get_bit( p, *bitp)) lvalue += 1;
|
||||
*bitp += 1;
|
||||
}
|
||||
x=((lvalue*s)+reference_value)*d;
|
||||
val[i] = (double)x;
|
||||
}
|
||||
} else {
|
||||
int bc;
|
||||
int l = bitsPerValue/8;
|
||||
size_t o = 0;
|
||||
|
||||
for(i=0;i < n_vals;i++)
|
||||
{
|
||||
lvalue = 0;
|
||||
lvalue <<= 8;
|
||||
lvalue |= p[o++] ;
|
||||
|
||||
for ( bc=1; bc<l; bc++ )
|
||||
{
|
||||
lvalue <<= 8;
|
||||
lvalue |= p[o++] ;
|
||||
}
|
||||
x=((lvalue*s)+reference_value)*d;
|
||||
val[i] = (double)x;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* code from grib_bits_fast_big_endian_simple.c */
|
||||
int grib_decode_double_array_complex(const unsigned char* p, long *bitp, long nbits,double reference_value,double s,double* d,size_t size,double* val) {
|
||||
long i=0;
|
||||
long countOfLeftmostBits=0,leftmostBits=0;
|
||||
long startBit;
|
||||
long remainingBits = nbits;
|
||||
long *pp=(long*)p;
|
||||
int inited=0;
|
||||
unsigned long uval=0;
|
||||
|
||||
startBit=*bitp;
|
||||
remainingBits = nbits;
|
||||
if (startBit >= max_nbits) {
|
||||
pp+=startBit/max_nbits;
|
||||
startBit %= max_nbits;
|
||||
}
|
||||
|
||||
if ( (max_nbits%nbits == 0) && (*bitp%nbits == 0) ) {
|
||||
for (i=0;i<size;i++) {
|
||||
if (startBit == max_nbits) {
|
||||
startBit = 0;
|
||||
pp++;
|
||||
}
|
||||
|
||||
val[i]=VALUE(*pp,startBit,remainingBits);
|
||||
val[i]= ((( (val[i]) * s)+reference_value)*d[i/2]);
|
||||
startBit+=remainingBits;
|
||||
remainingBits=nbits;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
for (i=0;i<size;i++) {
|
||||
countOfLeftmostBits = startBit + remainingBits;
|
||||
if (countOfLeftmostBits > max_nbits) {
|
||||
countOfLeftmostBits = max_nbits - startBit;
|
||||
remainingBits -= countOfLeftmostBits;
|
||||
leftmostBits=(VALUE(*pp,startBit,countOfLeftmostBits)) << remainingBits;
|
||||
startBit = 0;
|
||||
pp++;
|
||||
} else
|
||||
leftmostBits = 0;
|
||||
|
||||
val[i]=leftmostBits+(VALUE(*pp,startBit,remainingBits));
|
||||
val[i]= ((( (val[i]) * s)+reference_value)*d[i/2]);
|
||||
startBit+=remainingBits;
|
||||
remainingBits=nbits;
|
||||
}
|
||||
}
|
||||
|
||||
*bitp+=size*nbits;
|
||||
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
int grib_encode_long_array(size_t n_vals,const long* val,long bits_per_value,unsigned char* p,long *off)
|
||||
{
|
||||
size_t i=0;
|
||||
unsigned long unsigned_val=0;
|
||||
unsigned char *encoded=p;
|
||||
if(bits_per_value%8){
|
||||
for(i=0;i< n_vals;i++){
|
||||
unsigned_val=val[i];
|
||||
grib_encode_unsigned_longb(encoded, unsigned_val, off , bits_per_value);
|
||||
}
|
||||
} else{
|
||||
for(i=0;i< n_vals;i++){
|
||||
int blen=0;
|
||||
blen = bits_per_value;
|
||||
unsigned_val = val[i];
|
||||
while(blen >= 8)
|
||||
{
|
||||
blen -= 8;
|
||||
*encoded = (unsigned_val >> blen);
|
||||
encoded++;
|
||||
*off+=8;
|
||||
}
|
||||
}
|
||||
}
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
#define U_BYTEORDER static union {unsigned long l; unsigned char c[sizeof(long)];} u_byteorder = {1}
|
||||
#define IS_BIGENDIAN() (u_byteorder.c[sizeof(long) - 1])
|
||||
#define GRIBPACK unsigned char
|
||||
#define Error(x, y) fprintf(stderr, x, y)
|
||||
#define restrict
|
||||
|
||||
#include "encode_double_array.c"
|
||||
|
||||
#undef U_BYTEORDER
|
||||
#undef IS_BIGENDIAN
|
||||
#undef GRIBPACK
|
||||
#undef Error
|
||||
#undef restrict
|
||||
|
||||
int grib_encode_double_array(size_t n_vals,const double* val,long bits_per_value,double reference_value,double d,double divisor,unsigned char* p,long *off)
|
||||
{
|
||||
if ( fabs(d - 1) > 0 ) /* d != 1 */
|
||||
{
|
||||
size_t i=0;
|
||||
unsigned long unsigned_val=0;
|
||||
unsigned char *encoded=p;
|
||||
double x;
|
||||
|
||||
if(bits_per_value%8){
|
||||
for(i=0;i< n_vals;i++){
|
||||
x=(((val[i]*d)-reference_value)*divisor)+0.5;
|
||||
unsigned_val = (unsigned long)x;
|
||||
grib_encode_unsigned_longb(encoded, unsigned_val, off , bits_per_value);
|
||||
}
|
||||
} else{
|
||||
for(i=0;i< n_vals;i++){
|
||||
int blen=0;
|
||||
blen = bits_per_value;
|
||||
x = ((((val[i]*d)-reference_value)*divisor)+0.5);
|
||||
unsigned_val = (unsigned long)x;
|
||||
while(blen >= 8)
|
||||
{
|
||||
blen -= 8;
|
||||
*encoded = (unsigned_val >> blen);
|
||||
encoded++;
|
||||
*off+=8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined (_ARCH_PWR6)
|
||||
encode_double_array_unrolled(bits_per_value, 0, n_vals, p, val, reference_value, divisor, off);
|
||||
#else
|
||||
encode_double_array_byte (bits_per_value, 0, n_vals, p, val, reference_value, divisor, off);
|
||||
#endif
|
||||
}
|
||||
|
||||
return GRIB_SUCCESS;
|
||||
}
|
||||
|
||||
/* code from grib_bits_fast_big_endian_simple.c */
|
||||
int grib_encode_double_array_complex(size_t n_vals,double* val,long nbits,double reference_value,
|
||||
double* scal,double d,double divisor,unsigned char* p,long *bitp) {
|
||||
long* destination = (long*)p;
|
||||
double* v=val;
|
||||
long countOfLeftmostBits=0,startBit=0,remainingBits=0,rightmostBits=0;
|
||||
unsigned long uval=0;
|
||||
size_t i=0;
|
||||
|
||||
startBit=*bitp;
|
||||
remainingBits = nbits;
|
||||
|
||||
if (startBit >= max_nbits) {
|
||||
destination += startBit / max_nbits;
|
||||
startBit %= max_nbits;
|
||||
}
|
||||
|
||||
if ( (max_nbits%nbits == 0) && (*bitp%nbits == 0) ) {
|
||||
|
||||
for(i=0;i< n_vals;i++) {
|
||||
uval = (unsigned long)(((((*v)*d*scal[i/2])-reference_value)*divisor)+0.5);
|
||||
if (startBit == max_nbits) {
|
||||
startBit = 0;
|
||||
destination++;
|
||||
}
|
||||
|
||||
rightmostBits = VALUE(uval,max_nbits-remainingBits,remainingBits);
|
||||
*destination = ((*destination) & ~MASKVALUE(startBit,remainingBits))
|
||||
+ (rightmostBits << max_nbits-(remainingBits+startBit));
|
||||
startBit+=remainingBits;
|
||||
remainingBits=nbits;
|
||||
|
||||
v++;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
for(i=0;i< n_vals;i++) {
|
||||
countOfLeftmostBits = startBit + remainingBits;
|
||||
uval = (unsigned long)(((((*v)*d*scal[i/2])-reference_value)*divisor)+0.5);
|
||||
if (countOfLeftmostBits > max_nbits) {
|
||||
countOfLeftmostBits = max_nbits - startBit;
|
||||
startBit = max_nbits - remainingBits;
|
||||
remainingBits -= countOfLeftmostBits;
|
||||
*destination = (((*destination) >> countOfLeftmostBits) << countOfLeftmostBits)
|
||||
+ (VALUE(uval,startBit,countOfLeftmostBits));
|
||||
startBit = 0;
|
||||
destination++;
|
||||
}
|
||||
|
||||
rightmostBits = VALUE(uval,max_nbits-remainingBits,remainingBits);
|
||||
*destination = ((*destination) & ~MASKVALUE(startBit,remainingBits))
|
||||
+ (rightmostBits << max_nbits-(remainingBits+startBit));
|
||||
startBit+=remainingBits;
|
||||
remainingBits=nbits;
|
||||
|
||||
v++;
|
||||
}
|
||||
}
|
||||
|
||||
*bitp+=n_vals*nbits;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Copyright 2005-2013 ECMWF.
|
||||
*
|
||||
* This software is licensed under the terms of the Apache Licence Version 2.0
|
||||
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* In applying this licence, ECMWF does not waive the privileges and immunities granted to it by
|
||||
* virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __PGI
|
||||
#undef __AVX__
|
||||
#undef __SSE2__
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
#include <libhpc.h>
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <immintrin.h>
|
||||
#ifdef _GET_X86_COUNTER
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef __SSE2__
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <emmintrin.h>
|
||||
#ifdef _GET_X86_COUNTER
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __AVX__
|
||||
static
|
||||
void avx_minmax_val(const double *restrict buf, long nframes, double *min, double *max)
|
||||
{
|
||||
__m256d current_max, current_min, work;
|
||||
|
||||
/* load max and min values into all four slots of the XMM registers */
|
||||
current_min = _mm256_set1_pd(*min);
|
||||
current_max = _mm256_set1_pd(*max);
|
||||
|
||||
/* Work input until "buf" reaches 32 byte alignment */
|
||||
while ( ((unsigned long)buf) % 32 != 0 && nframes > 0) {
|
||||
|
||||
/* Load the next double into the work buffer */
|
||||
work = _mm256_set1_pd(*buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf++;
|
||||
nframes--;
|
||||
}
|
||||
|
||||
while (nframes >= 16) {
|
||||
/* use 64 byte prefetch? */
|
||||
__builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */
|
||||
|
||||
work = _mm256_load_pd(buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf += 4;
|
||||
__builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */
|
||||
work = _mm256_load_pd(buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf += 4;
|
||||
__builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */
|
||||
work = _mm256_load_pd(buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf += 4;
|
||||
__builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2+ */
|
||||
work = _mm256_load_pd(buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf += 4;
|
||||
nframes -= 16;
|
||||
}
|
||||
|
||||
/* work through aligned buffers */
|
||||
while (nframes >= 4) {
|
||||
work = _mm256_load_pd(buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf += 4;
|
||||
nframes -= 4;
|
||||
}
|
||||
|
||||
/* work through the remainung values */
|
||||
while ( nframes > 0) {
|
||||
work = _mm256_set1_pd(*buf);
|
||||
current_min = _mm256_min_pd(current_min, work);
|
||||
current_max = _mm256_max_pd(current_max, work);
|
||||
buf++;
|
||||
nframes--;
|
||||
}
|
||||
|
||||
/* find min & max value through shuffle tricks */
|
||||
|
||||
work = current_min;
|
||||
work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
work = _mm256_min_pd (work, current_min);
|
||||
current_min = work;
|
||||
work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
work = _mm256_min_pd (work, current_min);
|
||||
_mm256_store_pd(min, work);
|
||||
work = current_max;
|
||||
work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
work = _mm256_max_pd (work, current_max);
|
||||
current_max = work;
|
||||
work = _mm256_shuffle_pd(work, work, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
work = _mm256_max_pd (work, current_max);
|
||||
_mm256_store_pd(max, work);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifdef __SSE2__
|
||||
static
|
||||
void sse2_minmax_val(const double *restrict buf, long nframes, double *min, double *max)
|
||||
{
|
||||
__m128d current_max, current_min, work;
|
||||
|
||||
/* load starting max and min values into all slots of the XMM registers */
|
||||
current_min = _mm_set1_pd(*min);
|
||||
current_max = _mm_set1_pd(*max);
|
||||
|
||||
/* work on input until buf reaches 16 byte alignment */
|
||||
while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
|
||||
|
||||
/* load one double and replicate */
|
||||
work = _mm_set1_pd(*buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf++;
|
||||
nframes--;
|
||||
}
|
||||
|
||||
while (nframes >= 8) {
|
||||
/* use 64 byte prefetch for double octetts */
|
||||
__builtin_prefetch(buf+64,0,0); /* for GCC 4.3.2 + */
|
||||
|
||||
work = _mm_load_pd(buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf += 2;
|
||||
work = _mm_load_pd(buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf += 2;
|
||||
work = _mm_load_pd(buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf += 2;
|
||||
work = _mm_load_pd(buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf += 2;
|
||||
nframes -= 8;
|
||||
}
|
||||
|
||||
/* work through smaller chunks of aligned buffers without prefetching */
|
||||
while (nframes >= 2) {
|
||||
work = _mm_load_pd(buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf += 2;
|
||||
nframes -= 2;
|
||||
}
|
||||
|
||||
/* work through the remaining value */
|
||||
while ( nframes > 0) {
|
||||
/* load the last double and replicate */
|
||||
work = _mm_set1_pd(*buf);
|
||||
current_min = _mm_min_pd(current_min, work);
|
||||
current_max = _mm_max_pd(current_max, work);
|
||||
buf++;
|
||||
nframes--;
|
||||
}
|
||||
|
||||
/* find final min and max value through shuffle tricks */
|
||||
work = current_min;
|
||||
work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
|
||||
work = _mm_min_pd (work, current_min);
|
||||
_mm_store_sd(min, work);
|
||||
work = current_max;
|
||||
work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
|
||||
work = _mm_max_pd (work, current_max);
|
||||
_mm_store_sd(max, work);
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
static
|
||||
void minmax_val(const double *restrict data, long datasize, double *fmin, double *fmax)
|
||||
{
|
||||
#ifdef _GET_X86_COUNTER
|
||||
uint64_t start_minmax, end_minmax;
|
||||
#endif
|
||||
|
||||
if ( datasize < 1 ) return;
|
||||
|
||||
#ifdef _GET_X86_COUNTER
|
||||
start_minmax = _rdtsc();
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
avx_minmax_val(data, datasize, fmin, fmax);
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
sse2_minmax_val(data, datasize, fmin, fmax);
|
||||
|
||||
#else
|
||||
|
||||
#ifdef _ARCH_PWR6
|
||||
#define __UNROLL_DEPTH_1 6
|
||||
|
||||
/* to allow pipelining we have to unroll */
|
||||
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(1, "minmax fsel");
|
||||
#endif
|
||||
{
|
||||
long i, j;
|
||||
long residual = datasize % __UNROLL_DEPTH_1;
|
||||
long ofs = datasize - residual;
|
||||
double register dmin[__UNROLL_DEPTH_1];
|
||||
double register dmax[__UNROLL_DEPTH_1];
|
||||
|
||||
for ( j = 0; j < __UNROLL_DEPTH_1; j++)
|
||||
{
|
||||
dmin[j] = data[0];
|
||||
dmax[j] = data[0];
|
||||
}
|
||||
|
||||
for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_1 )
|
||||
{
|
||||
for (j = 0; j < __UNROLL_DEPTH_1; j++)
|
||||
{
|
||||
dmin[j] = __fsel(dmin[j] - data[i+j], data[i+j], dmin[j]);
|
||||
dmax[j] = __fsel(data[i+j] - dmax[j], data[i+j], dmax[j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < residual; j++)
|
||||
{
|
||||
dmin[j] = __fsel(dmin[j] - data[ofs+j], data[ofs+j], dmin[j]);
|
||||
dmax[j] = __fsel(data[ofs+j] - dmax[j], data[ofs+j], dmax[j]);
|
||||
}
|
||||
|
||||
for ( j = 0; j < __UNROLL_DEPTH_1; j++)
|
||||
{
|
||||
*fmin = __fsel(*fmin - dmin[j], dmin[j], *fmin);
|
||||
*fmax = __fsel(dmax[j] - *fmax, dmax[j], *fmax);
|
||||
}
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(1);
|
||||
#endif
|
||||
|
||||
#undef __UNROLL_DEPTH_1
|
||||
#else
|
||||
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStart(1, "minmax base");
|
||||
#endif
|
||||
{
|
||||
long i;
|
||||
|
||||
#if defined (CRAY)
|
||||
#pragma _CRI ivdep
|
||||
#elif defined (SX)
|
||||
#pragma vdir nodep
|
||||
#elif defined (__uxp__)
|
||||
#pragma loop novrec
|
||||
#endif
|
||||
for ( i = 0; i < datasize; ++i )
|
||||
{
|
||||
if ( *fmin > data[i] ) *fmin = data[i];
|
||||
if ( *fmax < data[i] ) *fmax = data[i];
|
||||
/*
|
||||
*fmin = *fmin < data[i] ? *fmin : data[i];
|
||||
*fmax = *fmax > data[i] ? *fmax : data[i];
|
||||
*/
|
||||
}
|
||||
}
|
||||
#ifdef _GET_IBM_COUNTER
|
||||
hpmStop(1);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _GET_X86_COUNTER
|
||||
end_minmax = _rdtsc();
|
||||
#ifdef __AVX__
|
||||
printf("AVX cycles:: %" PRIu64 "\n",
|
||||
end_minmax-start_minmax);
|
||||
#else
|
||||
#ifdef __SSE2__
|
||||
printf("SSE2 cycles:: %" PRIu64 "\n",
|
||||
end_minmax-start_minmax);
|
||||
#else
|
||||
printf("loop cycles:: %" PRIu64 "\n",
|
||||
end_minmax-start_minmax);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
Loading…
Reference in New Issue