/tomo/pyhst : contents of hst_cpu/cpumain.c at revision 276

: (revision 276)

To get this branch, use:

bzr branch
http://darksoft.org/webbzr/tomo/pyhst

/* ##  The PyHST program   is Copyright (C) 2002-2008 of the  */
/* ##  European Synchrotron Radiation Facility (ESRF). */

/* ##  You may use, distribute and copy the PyMCA XRF Toolkit under the terms of */
/* ##  GNU General Public License version 3, which is displayed below, or (at your */
/* ##  option) any later version. */

#include<string.h>
#include <stdio.h>
#include <stdlib.h>
#include<math.h>
#include<emmintrin.h>

#define FLOAT_TO_INT(in,out)  \
     out=_mm_cvtss_si32(_mm_load_ss(&(in)));
  /*      __asm__ __volatile__ ("fistpl %0" : "=m" (out) : "t" (in) : "st") ; */




#define PEZZO 64

void   cpu_inner(   float *SLICE ,   int num_proj, int  num_bins, float *WORK_perproje , 
		    float axis_position, float *axis_position_s,  float *cos_s, float *sin_s , 
		    float cpu_offset_x,  
		    float cpu_offset_y,
		    int oversampling) ;



int cpu_main(int num_y, int num_x,  float * SLICE, int num_proj, int num_bins, float *WORK_perproje , 
	     float axis_position, float * axis_position_s, float * cos_s, float *sin_s , float cpu_offset_x, float cpu_offset_y,
	     int *minX, int*maxX,
	     int oversampling)
{
  
  int dimx, dimy;
  int nx,ny;
  int X1,X2,Y1,Y2;
  //int ce;
  int ix,iy,jx,jy, pos;
  float * pezzoSLICE;


  nx = ( (int ) (num_x/PEZZO+0.99999))+1;
  ny = ( (int ) (num_y/PEZZO+0.99999))+1;
  dimx=PEZZO*nx;
  dimy=PEZZO*ny;

  pezzoSLICE = (float *) malloc( PEZZO*PEZZO *sizeof(float) );


  for( ix=0; ix<nx; ix++) {
    X1= PEZZO*ix;
    X2=X1+PEZZO;
    if(X2>num_x) X2=num_x;
    for( iy=0; iy<ny; iy++) {
      Y1= PEZZO*iy;
      Y2=Y1+PEZZO;
      if(Y2>num_y) Y2=num_y;

      /* csa: This branch was disabled in Alessandro version, I don't want to remove it
      completely but will prevent minX and maxX computation if it not needed in 
      different places of program. So if this branch is reenabled the minX and maxX
      computation should be as well reenabled in the reconstruction code */
      
      /*
      if (minX&&maxX) {
        int i;
	
        ce=0;
        for(i=Y1; i<Y2; i++) {
	    if( minX[i]<X2 && maxX[i]>X1) {
		ce=1;
		break;
	    }
        }
	
        if(ce==0) continue;
      } else {
        pyhst_warning("minX & maxX are not computed....");
      }
      */


      memset(pezzoSLICE,0, PEZZO*PEZZO*sizeof(float));
      cpu_inner(   pezzoSLICE ,   num_proj,  num_bins, WORK_perproje , 
		    axis_position, axis_position_s,  cos_s, sin_s , 
		    cpu_offset_x+ix*PEZZO,  
		   cpu_offset_y   + iy * PEZZO ,
		   oversampling);
       
      for(jy=Y1; jy<Y2; jy++) {
	pos= jy*num_x+X1;
	for(jx=X1; jx<X2; jx++) {
	  SLICE[ pos] = pezzoSLICE[ PEZZO*(jy-Y1) + (jx-X1)];
	  pos++;
	}
      }
      
    }
  }
  free(pezzoSLICE);
  return 1;
}



void   cpu_inner(   float *SLICE ,   int num_proj, int  num_bins, float *WORK_perproje , 
		    float axis_position, float *axis_position_s,  float *cos_s, float *sin_s , 
		    float cpu_offset_x,  
		    float cpu_offset_y,
		    int oversampling) {


  float  DY;
  float *OVERSAMPLE;

  int j;

  float cos_angle ,sin_angle;



  float increment_position, increment_position_2, increment_position_3;
  float increment_position_4, increment_position_5, increment_position_6;
  float increment_position_7, increment_position_8;
  float slice_positionA, slice_position;
  float axis_position_corr ;
  
  
  int y=0;
  int x_start, x_end,num_xelem ;
  long start_address;
  long  projection, address;
  long  bin,bin1,bin2,bin3;
  long  bin4,bin5,bin6,bin7;

  float  fbin ,fbin1,fbin2,fbin3;
  float  fbin4,fbin5,fbin6,fbin7;

  OVERSAMPLE = WORK_perproje + oversampling*num_bins;
  for(projection=0; projection < num_proj-1; projection++) {
    /* printf(" pro %ld \n", projection); */

    
    
    cos_angle = cos_s[projection];
    sin_angle = sin_s[projection];
    

    increment_position = (float)( cos_angle * (oversampling) );
    increment_position_2 =(float) ( increment_position*2);
    increment_position_3 =(float) ( increment_position*3);
    increment_position_4 = (float)( increment_position*4);
    increment_position_5 = (float)( increment_position*5);
    increment_position_6 = (float)( increment_position*6);
    increment_position_7 = (float)( increment_position*7);
    increment_position_8 = (float)( increment_position*8);
    
    

	
      axis_position_corr = axis_position_s[projection] ; 
      slice_positionA =
	( (float) ( ( (oversampling) * ( axis_position_corr + 
					 (     cpu_offset_x    - 0.5f - axis_position ) * cos_angle - 
					 (     cpu_offset_y    - 0.5f - axis_position ) * sin_angle -0.5f ) 
		      )));
      
      DY = -oversampling *sin_angle;
      
      start_address = 0;      
      for(y   = 0  ; y < PEZZO  ; y++) {

	x_start=0;
	x_end=PEZZO;


	num_xelem = PEZZO;
	
	address = start_address ; 
	j=0;
	
	slice_position=slice_positionA;

	for(; j < PEZZO - 7; j += 8) {
	  
	  fbin = slice_position;
	  fbin1 = slice_position+ increment_position;
	  fbin2 = slice_position+ increment_position_2;
	  fbin3 = slice_position+ increment_position_3;
	  
	  



	  
	  FLOAT_TO_INT(fbin,bin);
	  FLOAT_TO_INT(fbin1,bin1);
	  FLOAT_TO_INT(fbin2,bin2);
	  FLOAT_TO_INT(fbin3,bin3);
	  

	  /* printf("%d %f  %f \n",address , fbin,   OVERSAMPLE[bin] ); */


	  SLICE[address  ] = SLICE[address  ] + OVERSAMPLE[bin]; 
	  SLICE[address+1] = SLICE[address+1] + OVERSAMPLE[bin1]; 
	  SLICE[address+2] = SLICE[address+2] + OVERSAMPLE[bin2]; 
	  SLICE[address+3] = SLICE[address+3] + OVERSAMPLE[bin3];
	  
	  
	  fbin4 = slice_position+ increment_position_4;
	  fbin5 = slice_position+ increment_position_5;
	  fbin6 = slice_position+ increment_position_6;
	  fbin7 = slice_position+ increment_position_7;
	  
	  FLOAT_TO_INT(fbin4,bin4);
	  FLOAT_TO_INT(fbin5,bin5);
	  FLOAT_TO_INT(fbin6,bin6);
	  FLOAT_TO_INT(fbin7,bin7);
	  
	  
	  
	  SLICE[address+4  ] = SLICE[address+4] + OVERSAMPLE[bin4]; 
	  SLICE[address+5  ] = SLICE[address+5] + OVERSAMPLE[bin5]; 
	  SLICE[address+6  ] = SLICE[address+6] + OVERSAMPLE[bin6]; 
	  SLICE[address+7  ] = SLICE[address+7] + OVERSAMPLE[bin7];
	  
	  slice_position = slice_position + increment_position_8;
	  address += 8;
	}      
	slice_positionA=slice_positionA+DY;
	start_address=start_address+PEZZO;
      }
      OVERSAMPLE += 3*oversampling*num_bins;

  }
  
  
  
}