Commits

Nathanael Schaeffer  committed e77b1d8

simplifications in OpenMP codelets.

  • Participants
  • Parent commits af66bc6

Comments (0)

Files changed (2)

File SHT/omp_SH_to_spat.gen.c

 V		BtF += mstep*NLAT;	BpF += mstep*NLAT;
 	#endif
 	}
+
+	#if _GCC_VEC_
+	while(im <= NPHI-imlim) {	// padding for high m's
+		k=0;
+		do {
+Q			BrF[k] = vdup(0.0);
+V			BtF[k] = vdup(0.0);		BpF[k] = vdup(0.0);
+		} while (++k < NLAT_2);
+Q		BrF += mstep*NLAT_2;
+V		BtF += mstep*NLAT_2;	BpF += mstep*NLAT_2;
+	  im+=mstep;
+	}
+	#else
+	while(im <= NPHI/2) {	// padding for high m's
+		k=0;
+		do {
+Q			BrF[k] = 0.0;
+V			BtF[k] = 0.0;	BpF[k] = 0.0;
+		} while (++k < NLAT);
+Q		BrF += mstep*NLAT;
+V		BtF += mstep*NLAT;	BpF += mstep*NLAT;
+	  im+=mstep;
+	}
+	#endif
   #endif
 }
 
 S		GEN3(_sy1s,NWAY,SUFFIX)(shtns, Slm, BtF, BpF, llim, imlim);
 T		GEN3(_sy1t,NWAY,SUFFIX)(shtns, Tlm, BtF, BpF, llim, imlim);
 	#endif
-
-  #ifndef SHT_AXISYM
-    if (NPHI >= 2*imlim)	// padding for high m's
-    #pragma omp single nowait
-    {
-		k=0;
-	  #if _GCC_VEC_
-		do {
-Q			BrF[k +NLAT_2*imlim] = vdup(0.0);
-V			BtF[k +NLAT_2*imlim] = vdup(0.0);	BpF[k +NLAT_2*imlim] = vdup(0.0);
-		} while ( ++k < NLAT_2*(NPHI+1-2*imlim) );
-	  #else
-		do {
-Q			BrF[k +NLAT*imlim] = 0.0;
-V			BtF[k +NLAT*imlim] = 0.0;	BpF[k +NLAT*imlim] = 0.0;
-		} while (++k < NLAT*((NPHI>>1) -imlim+1) );
-	  #endif
-	}
-  #endif
   }
 
   #ifndef SHT_AXISYM

File SHT/omp_spat_to_SH.gen.c

 		}
 		#ifdef SHT_VAR_LTR
 			for (l=llim+1; l<= LMAX; ++l) {
-Q				Qlm[l] = 0.0;
-V				Slm[l] = 0.0;		Tlm[l] = 0.0;
+Q				((v2d*)Qlm)[l] = vdup(0.0);
+V				((v2d*)Slm)[l] = vdup(0.0);		((v2d*)Tlm)[l] = vdup(0.0);
 			}
+			#ifndef SHT_AXISYM
+			if (imlim <= MMAX) {		// zero out m >= imlim
+				l = LiM(shtns, imlim*MRES, imlim);
+				do {
+Q					((v2d*)Qlm)[l] = vdup(0.0);
+V					((v2d*)Slm)[l] = vdup(0.0);		((v2d*)Tlm)[l] = vdup(0.0);
+				} while(++l < shtns->nlm);
+			}
+			#endif
 		#endif
 		m0=mstep;
 	}
 QX	GEN3(_an1,NWAY,SUFFIX)(shtns, BrF, Qlm, llim, imlim);
 VX	GEN3(_an2,NWAY,SUFFIX)(shtns, BtF, BpF, Slm, Tlm, llim, imlim);
 3	GEN3(_an3,NWAY,SUFFIX)(shtns, BrF, BtF, BpF, Qlm, Slm, Tlm, llim, imlim);
-
-  #ifndef SHT_AXISYM
-  	#ifdef SHT_VAR_LTR
-	if (imlim <= MMAX)
-	#pragma omp single nowait
-	{
-		long int l = LiM(shtns, imlim*MRES, imlim);
-		do {
-Q			((v2d*)Qlm)[l] = vdup(0.0);
-V			((v2d*)Slm)[l] = vdup(0.0);		((v2d*)Tlm)[l] = vdup(0.0);
-		} while(++l < shtns->nlm);
-	}
-	#endif
-  #endif
   }
 
   #ifndef SHT_AXISYM