OgreImageResampler.h

Go to the documentation of this file.
00001 /*
00002 -----------------------------------------------------------------------------
00003 This source file is part of OGRE
00004     (Object-oriented Graphics Rendering Engine)
00005 For the latest info, see http://www.ogre3d.org/
00006 
00007 Copyright (c) 2000-2006 Torus Knot Software Ltd
00008 Also see acknowledgements in Readme.html
00009 
00010 This program is free software; you can redistribute it and/or modify it under
00011 the terms of the GNU Lesser General Public License as published by the Free Software
00012 Foundation; either version 2 of the License, or (at your option) any later
00013 version.
00014 
00015 This program is distributed in the hope that it will be useful, but WITHOUT
00016 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00017 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
00018 
00019 You should have received a copy of the GNU Lesser General Public License along with
00020 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
00021 Place - Suite 330, Boston, MA 02111-1307, USA, or go to
00022 http://www.gnu.org/copyleft/lesser.txt.
00023 
00024 You may alternatively use this source under the terms of a specific version of
00025 the OGRE Unrestricted License provided you have obtained such a license from
00026 Torus Knot Software Ltd.
00027 -----------------------------------------------------------------------------
00028 */
00029 #ifndef OGREIMAGERESAMPLER_H
00030 #define OGREIMAGERESAMPLER_H
00031 
00032 #include <algorithm>
00033 
00034 // this file is inlined into OgreImage.cpp!
00035 // do not include anywhere else.
00036 namespace Ogre {
00037 
00038 // define uint64 type
00039 #if OGRE_COMPILER == OGRE_COMPILER_MSVC
00040 typedef unsigned __int64 uint64;
00041 #else
00042 typedef unsigned long long uint64;
00043 #endif
00044 
00045 // variable name hints:
00046 // sx_48 = 16/48-bit fixed-point x-position in source
00047 // stepx = difference between adjacent sx_48 values
00048 // sx1 = lower-bound integer x-position in source
00049 // sx2 = upper-bound integer x-position in source
00050 // sxf = fractional weight beween sx1 and sx2
00051 // x,y,z = location of output pixel in destination
00052 
00053 // nearest-neighbor resampler, does not convert formats.
00054 // templated on bytes-per-pixel to allow compiler optimizations, such
00055 // as simplifying memcpy() and replacing multiplies with bitshifts
00056 template<unsigned int elemsize> struct NearestResampler {
00057     static void scale(const PixelBox& src, const PixelBox& dst) {
00058         // assert(src.format == dst.format);
00059 
00060         // srcdata stays at beginning, pdst is a moving pointer
00061         uchar* srcdata = (uchar*)src.data;
00062         uchar* pdst = (uchar*)dst.data;
00063 
00064         // sx_48,sy_48,sz_48 represent current position in source
00065         // using 16/48-bit fixed precision, incremented by steps
00066         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00067         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00068         uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
00069 
00070         // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
00071         // for the center of the destination pixel, not the top-left corner
00072         uint64 sz_48 = (stepz >> 1) - 1;
00073         for (size_t z = dst.front; z < dst.back; z++, sz_48 += stepz) {
00074             size_t srczoff = (size_t)(sz_48 >> 48) * src.slicePitch;
00075             
00076             uint64 sy_48 = (stepy >> 1) - 1;
00077             for (size_t y = dst.top; y < dst.bottom; y++, sy_48 += stepy) {
00078                 size_t srcyoff = (size_t)(sy_48 >> 48) * src.rowPitch;
00079             
00080                 uint64 sx_48 = (stepx >> 1) - 1;
00081                 for (size_t x = dst.left; x < dst.right; x++, sx_48 += stepx) {
00082                     uchar* psrc = srcdata +
00083                         elemsize*((size_t)(sx_48 >> 48) + srcyoff + srczoff);
00084                     memcpy(pdst, psrc, elemsize);
00085                     pdst += elemsize;
00086                 }
00087                 pdst += elemsize*dst.getRowSkip();
00088             }
00089             pdst += elemsize*dst.getSliceSkip();
00090         }
00091     }
00092 };
00093 
00094 
00095 // default floating-point linear resampler, does format conversion
00096 struct LinearResampler {
00097     static void scale(const PixelBox& src, const PixelBox& dst) {
00098         size_t srcelemsize = PixelUtil::getNumElemBytes(src.format);
00099         size_t dstelemsize = PixelUtil::getNumElemBytes(dst.format);
00100 
00101         // srcdata stays at beginning, pdst is a moving pointer
00102         uchar* srcdata = (uchar*)src.data;
00103         uchar* pdst = (uchar*)dst.data;
00104         
00105         // sx_48,sy_48,sz_48 represent current position in source
00106         // using 16/48-bit fixed precision, incremented by steps
00107         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00108         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00109         uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
00110         
00111         // temp is 16/16 bit fixed precision, used to adjust a source
00112         // coordinate (x, y, or z) backwards by half a pixel so that the
00113         // integer bits represent the first sample (eg, sx1) and the
00114         // fractional bits are the blend weight of the second sample
00115         unsigned int temp;
00116 
00117         // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
00118         // for the center of the destination pixel, not the top-left corner
00119         uint64 sz_48 = (stepz >> 1) - 1;
00120         for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
00121             temp = sz_48 >> 32;
00122             temp = (temp > 0x8000)? temp - 0x8000 : 0;
00123             size_t sz1 = temp >> 16;                 // src z, sample #1
00124             size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
00125             float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
00126 
00127             uint64 sy_48 = (stepy >> 1) - 1;
00128             for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
00129                 temp = sy_48 >> 32;
00130                 temp = (temp > 0x8000)? temp - 0x8000 : 0;
00131                 size_t sy1 = temp >> 16;                    // src y #1
00132                 size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
00133                 float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
00134                 
00135                 uint64 sx_48 = (stepx >> 1) - 1;
00136                 for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
00137                     temp = sx_48 >> 32;
00138                     temp = (temp > 0x8000)? temp - 0x8000 : 0;
00139                     size_t sx1 = temp >> 16;                    // src x #1
00140                     size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
00141                     float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
00142                 
00143                     ColourValue x1y1z1, x2y1z1, x1y2z1, x2y2z1;
00144                     ColourValue x1y1z2, x2y1z2, x1y2z2, x2y2z2;
00145 
00146 #define UNPACK(dst,x,y,z) PixelUtil::unpackColour(&dst, src.format, \
00147     srcdata + srcelemsize*((x)+(y)*src.rowPitch+(z)*src.slicePitch))
00148 
00149                     UNPACK(x1y1z1,sx1,sy1,sz1); UNPACK(x2y1z1,sx2,sy1,sz1);
00150                     UNPACK(x1y2z1,sx1,sy2,sz1); UNPACK(x2y2z1,sx2,sy2,sz1);
00151                     UNPACK(x1y1z2,sx1,sy1,sz2); UNPACK(x2y1z2,sx2,sy1,sz2);
00152                     UNPACK(x1y2z2,sx1,sy2,sz2); UNPACK(x2y2z2,sx2,sy2,sz2);
00153 #undef UNPACK
00154 
00155                     ColourValue accum =
00156                         x1y1z1 * ((1.0f - sxf)*(1.0f - syf)*(1.0f - szf)) +
00157                         x2y1z1 * (        sxf *(1.0f - syf)*(1.0f - szf)) +
00158                         x1y2z1 * ((1.0f - sxf)*        syf *(1.0f - szf)) +
00159                         x2y2z1 * (        sxf *        syf *(1.0f - szf)) +
00160                         x1y1z2 * ((1.0f - sxf)*(1.0f - syf)*        szf ) +
00161                         x2y1z2 * (        sxf *(1.0f - syf)*        szf ) +
00162                         x1y2z2 * ((1.0f - sxf)*        syf *        szf ) +
00163                         x2y2z2 * (        sxf *        syf *        szf );
00164 
00165                     PixelUtil::packColour(accum, dst.format, pdst);
00166 
00167                     pdst += dstelemsize;
00168                 }
00169                 pdst += dstelemsize*dst.getRowSkip();
00170             }
00171             pdst += dstelemsize*dst.getSliceSkip();
00172         }
00173     }
00174 };
00175 
00176 
00177 // float32 linear resampler, converts FLOAT32_RGB/FLOAT32_RGBA only.
00178 // avoids overhead of pixel unpack/repack function calls
00179 struct LinearResampler_Float32 {
00180     static void scale(const PixelBox& src, const PixelBox& dst) {
00181         size_t srcchannels = PixelUtil::getNumElemBytes(src.format) / sizeof(float);
00182         size_t dstchannels = PixelUtil::getNumElemBytes(dst.format) / sizeof(float);
00183         // assert(srcchannels == 3 || srcchannels == 4);
00184         // assert(dstchannels == 3 || dstchannels == 4);
00185 
00186         // srcdata stays at beginning, pdst is a moving pointer
00187         float* srcdata = (float*)src.data;
00188         float* pdst = (float*)dst.data;
00189         
00190         // sx_48,sy_48,sz_48 represent current position in source
00191         // using 16/48-bit fixed precision, incremented by steps
00192         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00193         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00194         uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
00195         
00196         // temp is 16/16 bit fixed precision, used to adjust a source
00197         // coordinate (x, y, or z) backwards by half a pixel so that the
00198         // integer bits represent the first sample (eg, sx1) and the
00199         // fractional bits are the blend weight of the second sample
00200         unsigned int temp;
00201 
00202         // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
00203         // for the center of the destination pixel, not the top-left corner
00204         uint64 sz_48 = (stepz >> 1) - 1;
00205         for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
00206             temp = sz_48 >> 32;
00207             temp = (temp > 0x8000)? temp - 0x8000 : 0;
00208             size_t sz1 = temp >> 16;                 // src z, sample #1
00209             size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
00210             float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
00211 
00212             uint64 sy_48 = (stepy >> 1) - 1;
00213             for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
00214                 temp = sy_48 >> 32;
00215                 temp = (temp > 0x8000)? temp - 0x8000 : 0;
00216                 size_t sy1 = temp >> 16;                    // src y #1
00217                 size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
00218                 float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
00219                 
00220                 uint64 sx_48 = (stepx >> 1) - 1;
00221                 for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
00222                     temp = sx_48 >> 32;
00223                     temp = (temp > 0x8000)? temp - 0x8000 : 0;
00224                     size_t sx1 = temp >> 16;                    // src x #1
00225                     size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
00226                     float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
00227                     
00228                     // process R,G,B,A simultaneously for cache coherence?
00229                     float accum[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
00230 
00231 #define ACCUM3(x,y,z,factor) \
00232     { float f = factor; \
00233     size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
00234     accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
00235     accum[2]+=srcdata[off+2]*f; }
00236 
00237 #define ACCUM4(x,y,z,factor) \
00238     { float f = factor; \
00239     size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
00240     accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
00241     accum[2]+=srcdata[off+2]*f; accum[3]+=srcdata[off+3]*f; }
00242 
00243                     if (srcchannels == 3 || dstchannels == 3) {
00244                         // RGB, no alpha
00245                         ACCUM3(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
00246                         ACCUM3(sx2,sy1,sz1,      sxf *(1.0f-syf)*(1.0f-szf));
00247                         ACCUM3(sx1,sy2,sz1,(1.0f-sxf)*      syf *(1.0f-szf));
00248                         ACCUM3(sx2,sy2,sz1,      sxf *      syf *(1.0f-szf));
00249                         ACCUM3(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)*      szf );
00250                         ACCUM3(sx2,sy1,sz2,      sxf *(1.0f-syf)*      szf );
00251                         ACCUM3(sx1,sy2,sz2,(1.0f-sxf)*      syf *      szf );
00252                         ACCUM3(sx2,sy2,sz2,      sxf *      syf *      szf );
00253                         accum[3] = 1.0f;
00254                     } else {
00255                         // RGBA
00256                         ACCUM4(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
00257                         ACCUM4(sx2,sy1,sz1,      sxf *(1.0f-syf)*(1.0f-szf));
00258                         ACCUM4(sx1,sy2,sz1,(1.0f-sxf)*      syf *(1.0f-szf));
00259                         ACCUM4(sx2,sy2,sz1,      sxf *      syf *(1.0f-szf));
00260                         ACCUM4(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)*      szf );
00261                         ACCUM4(sx2,sy1,sz2,      sxf *(1.0f-syf)*      szf );
00262                         ACCUM4(sx1,sy2,sz2,(1.0f-sxf)*      syf *      szf );
00263                         ACCUM4(sx2,sy2,sz2,      sxf *      syf *      szf );
00264                     }
00265 
00266                     memcpy(pdst, accum, sizeof(float)*dstchannels);
00267 
00268 #undef ACCUM3
00269 #undef ACCUM4
00270 
00271                     pdst += dstchannels;
00272                 }
00273                 pdst += dstchannels*dst.getRowSkip();
00274             }
00275             pdst += dstchannels*dst.getSliceSkip();
00276         }
00277     }
00278 };
00279 
00280 
00281 
00282 // byte linear resampler, does not do any format conversions.
00283 // only handles pixel formats that use 1 byte per color channel.
00284 // 2D only; punts 3D pixelboxes to default LinearResampler (slow).
00285 // templated on bytes-per-pixel to allow compiler optimizations, such
00286 // as unrolling loops and replacing multiplies with bitshifts
00287 template<unsigned int channels> struct LinearResampler_Byte {
00288     static void scale(const PixelBox& src, const PixelBox& dst) {
00289         // assert(src.format == dst.format);
00290 
00291         // only optimized for 2D
00292         if (src.getDepth() > 1 || dst.getDepth() > 1) {
00293             LinearResampler::scale(src, dst);
00294             return;
00295         }
00296 
00297         // srcdata stays at beginning of slice, pdst is a moving pointer
00298         uchar* srcdata = (uchar*)src.data;
00299         uchar* pdst = (uchar*)dst.data;
00300 
00301         // sx_48,sy_48 represent current position in source
00302         // using 16/48-bit fixed precision, incremented by steps
00303         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00304         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00305         
00306         // bottom 28 bits of temp are 16/12 bit fixed precision, used to
00307         // adjust a source coordinate backwards by half a pixel so that the
00308         // integer bits represent the first sample (eg, sx1) and the
00309         // fractional bits are the blend weight of the second sample
00310         unsigned int temp;
00311         
00312         uint64 sy_48 = (stepy >> 1) - 1;
00313         for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
00314             temp = sy_48 >> 36;
00315             temp = (temp > 0x800)? temp - 0x800: 0;
00316             unsigned int syf = temp & 0xFFF;
00317             size_t sy1 = temp >> 12;
00318             size_t sy2 = std::min(sy1+1, src.bottom-src.top-1);
00319             size_t syoff1 = sy1 * src.rowPitch;
00320             size_t syoff2 = sy2 * src.rowPitch;
00321 
00322             uint64 sx_48 = (stepx >> 1) - 1;
00323             for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
00324                 temp = sx_48 >> 36;
00325                 temp = (temp > 0x800)? temp - 0x800 : 0;
00326                 unsigned int sxf = temp & 0xFFF;
00327                 size_t sx1 = temp >> 12;
00328                 size_t sx2 = std::min(sx1+1, src.right-src.left-1);
00329 
00330                 unsigned int sxfsyf = sxf*syf;
00331                 for (unsigned int k = 0; k < channels; k++) {
00332                     unsigned int accum =
00333                         srcdata[(sx1 + syoff1)*channels+k]*(0x1000000-(sxf<<12)-(syf<<12)+sxfsyf) +
00334                         srcdata[(sx2 + syoff1)*channels+k]*((sxf<<12)-sxfsyf) +
00335                         srcdata[(sx1 + syoff2)*channels+k]*((syf<<12)-sxfsyf) +
00336                         srcdata[(sx2 + syoff2)*channels+k]*sxfsyf;
00337                     // accum is computed using 8/24-bit fixed-point math
00338                     // (maximum is 0xFF000000; rounding will not cause overflow)
00339                     *pdst++ = (accum + 0x800000) >> 24;
00340                 }
00341             }
00342             pdst += channels*dst.getRowSkip();
00343         }
00344     }
00345 };
00346 
00347 }
00348 
00349 #endif

Copyright © 2000-2005 by The OGRE Team
Creative Commons License
This work is licensed under a Creative Commons Attribution-ShareAlike 2.5 License.
Last modified Sat May 10 16:24:59 2008