[Quad] Add functions for conversion between quad and string (#237)

shibatch · web-flow · commit 9a3aecfe6eed · 2019-01-29T13:06:21.000+09:00
This patch adds Sleef_strtoq and Sleef_qtostr which can be used to convert between a quad value and a string. These functions are not vectorized. The corresponding testers are also added. This patch also adds functions for subtraction. Intel compiler testing is temporarily disabled because of license expiration( #238 ).
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -43,7 +43,7 @@ pipeline {
 			 '''
             	     }
                 }
-
+/*
                 stage('Intel Compiler') {
                     agent { label 'icc' }
                     steps {
@@ -64,7 +64,7 @@ pipeline {
 		        '''
                     }
                 }
-
+*/
                 stage('FMA4') {
             	     agent { label 'fma4' }
             	     steps {
diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h
@@ -215,9 +215,11 @@ static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd
 #if CONFIG == 1
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(z, vmul_vd_vd_vd(x, y)); }
 #else
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); }
 static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); }
 static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); }
 static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); }
diff --git a/src/arch/helperpower_128.h b/src/arch/helperpower_128.h
@@ -211,6 +211,7 @@ static INLINE vdouble vsqrt_vd_vd(vdouble d) { return vec_sqrt(d); }
 #if CONFIG == 1
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmsub(x, y, z); }
 #else
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h
@@ -205,6 +205,7 @@ static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return x < y ? x : y
 #ifndef ENABLE_FMA_DP
 static INLINE vdouble vmla_vd_vd_vd_vd  (vdouble x, vdouble y, vdouble z) { return x * y + z; }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return x * y - z; }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return -x * y + z; }
 #else
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, -z); }
diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h
@@ -191,6 +191,7 @@ static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm_andnot_pd(_mm_set1_pd(-
 static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm_xor_pd(_mm_set1_pd(-0.0), d); }
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(z, vmul_vd_vd_vd(x, y)); }
 static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); }
 static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); }
 
diff --git a/src/arch/helpersve.h b/src/arch/helpersve.h
@@ -547,6 +547,9 @@ static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y,
                                          vdouble z) { // z = x * y - z
   return svnmsb_f64_x(ptrue, x, y, z);
 }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return svmsb_f64_x(ptrue, x, y, z);
+}
 #else
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
diff --git a/src/quad-tester/qiutsimd.c b/src/quad-tester/qiutsimd.c
@@ -171,12 +171,45 @@ typedef union {
     }									\
   }
 
+#define func_strtoq(funcStr) {						\
+    while (startsWith(buf, funcStr " ")) {				\
+      sentinel = 0;							\
+      char s[64];							\
+      sscanf(buf, funcStr " %63s", s);					\
+      Sleef_quad1 a0;							\
+      a0 = Sleef_strtoq(s, NULL, 10);					\
+      cnv128 c0;							\
+      c0.q = a0.s[0];							\
+      printf("%" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l);			\
+      fflush(stdout);							\
+      if (fgets(buf, BUFSIZE-1, stdin) == NULL) break;			\
+    }									\
+  }
+
+#define func_qtostr(funcStr) {						\
+    while (startsWith(buf, funcStr " ")) {				\
+      sentinel = 0;							\
+      cnv128 c0;							\
+      sscanf(buf, funcStr " %" PRIx64 ":%" PRIx64, &c0.h, &c0.l);	\
+      Sleef_quad1 a0;							\
+      a0.s[0] = c0.q;							\
+      char s[64];							\
+      Sleef_qtostr(s, 63, a0, 10);					\
+      printf("%s\n", s);						\
+      fflush(stdout);							\
+      if (fgets(buf, BUFSIZE-1, stdin) == NULL) break;			\
+    }									\
+  }
+
 int do_test(int argc, char **argv) {
   xsrand(time(NULL));
 
   {
     int k = 0;
     k += 1;
+#ifdef ENABLE_PUREC_SCALAR
+    k += 2; // Enable string testing
+#endif
     printf("%d\n", k);
     fflush(stdout);
   }
@@ -187,9 +220,13 @@ int do_test(int argc, char **argv) {
 
   while(!feof(stdin) && sentinel < 2) {
     func_q_q_q("addq_u05", xaddq_u05);
+    func_q_q_q("subq_u05", xsubq_u05);
     func_q_q_q("mulq_u05", xmulq_u05);
     func_q_q_q("divq_u05", xdivq_u05);
     func_q_q("sqrtq_u05", xsqrtq_u05);
+    func_q_q("negq", xnegq);
+    func_strtoq("strtoq");
+    func_qtostr("qtostr");
     sentinel++;
   }
 
diff --git a/src/quad-tester/qtester.c b/src/quad-tester/qtester.c
@@ -139,11 +139,34 @@ typedef union {
     return c0.q;							\
   } while(0)
 
+#define child_q_str(funcStr, arg) do {					\
+    char str[256];							\
+    sprintf(str, funcStr " %s\n", arg);					\
+    write(ptoc[1], str, strlen(str));					\
+    if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr);	\
+    cnv128 c;								\
+    sscanf(str, "%" PRIx64 ":%" PRIx64, &c.h, &c.l);			\
+    return c.q;								\
+  } while(0)
+
+#define child_str_q(funcStr, ret, arg) do {				\
+    char str[256];							\
+    cnv128 c;								\
+    c.q = arg;								\
+    sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 "\n", c.h, c.l);	\
+    write(ptoc[1], str, strlen(str));					\
+    if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr);	\
+    sscanf(str, "%63s", ret);						\
+  } while(0)
+
 Sleef_quad child_addq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("addq_u05", x, y); }
 Sleef_quad child_subq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("subq_u05", x, y); }
 Sleef_quad child_mulq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("mulq_u05", x, y); }
 Sleef_quad child_divq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("divq_u05", x, y); }
 Sleef_quad child_sqrtq_u05(Sleef_quad x) { child_q_q("sqrtq_u05", x); }
+Sleef_quad child_negq(Sleef_quad x) { child_q_q("negq", x); }
+Sleef_quad child_strtoq(const char *s) { child_q_str("strtoq", s); }
+void child_qtostr(char *ret, Sleef_quad x) { child_str_q("qtostr", ret, x); }
 
 Sleef_quad child_copysignq(Sleef_quad x, Sleef_quad y) { child_q_q_q("copysignq", x, y); }
 Sleef_quad child_fabsq(Sleef_quad x) { child_q_q("fabsq", x); }
@@ -240,7 +263,7 @@ Sleef_quad child_fminq(Sleef_quad x, Sleef_quad y) { child_q_q_q("fminq", x, y);
   } while(0)
 
 #define checkAccuracyOuterLoop_q(mpfrFunc, childFunc, minStr, maxStr, nLoop, bound, seed) do { \
-    xsrand(seed);								\
+    xsrand(seed);							\
     Sleef_quad min = cast_q_str(minStr), max = cast_q_str(maxStr);	\
     for(int i=0;i<nLoop && success;i++) {				\
       Sleef_quad x = rndf128(min, max);					\
@@ -263,7 +286,7 @@ void checkResult(int success, double e) {
 #define STR_QUAD_MAX "1.18973149535723176508575932662800702e+4932"
 #define STR_QUAD_DENORM_MIN "6.475175119438025110924438958227646552e-4966"
 
-void do_test() {
+void do_test(int options) {
   mpfr_set_default_prec(256);
   mpfr_t frx, fry, frz;
   mpfr_inits(frx, fry, frz, NULL);
@@ -274,6 +297,7 @@ void do_test() {
     "0.0", "-0.0", "+0.5", "-0.5", "+1.0", "-1.0", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5",
     "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100",
     "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000",
+    "3.1415926535897932384626433832795028841971693993751058209749445923078164",
     "+" STR_QUAD_MIN, "-" STR_QUAD_MIN,
     "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN,
     "NaN", "Inf", "-Inf"
@@ -291,6 +315,13 @@ void do_test() {
   checkAccuracyOuterLoop_q_q(mpfr_add, child_addq_u05, "0", "Inf", 5 * NTEST, errorBound, 1);
   checkResult(success, maxError);
 
+  fprintf(stderr, "subq_u05 : ");
+  maxError = 0;
+  cmpDenormOuterLoop_q_q(mpfr_sub, child_subq_u05, stdCheckVals);
+  checkAccuracyOuterLoop_q_q(mpfr_sub, child_subq_u05, "1e-100", "1e+100", 5 * NTEST, errorBound, 0);
+  checkAccuracyOuterLoop_q_q(mpfr_sub, child_subq_u05, "0", "Inf", 5 * NTEST, errorBound, 1);
+  checkResult(success, maxError);
+
   fprintf(stderr, "mulq_u05 : ");
   maxError = 0;
   cmpDenormOuterLoop_q_q(mpfr_mul, child_mulq_u05, stdCheckVals);
@@ -311,11 +342,50 @@ void do_test() {
   checkAccuracyOuterLoop_q(mpfr_sqrt, child_sqrtq_u05, "1e-100", "1e+100", 5 * NTEST, errorBound, 0);
   checkAccuracyOuterLoop_q(mpfr_sqrt, child_sqrtq_u05, "0", "Inf", 5 * NTEST, errorBound, 1);
   checkResult(success, maxError);
+
+  fprintf(stderr, "negq : ");
+  maxError = 0;
+  cmpDenormOuterLoop_q(mpfr_neg, child_negq, stdCheckVals);
+  checkAccuracyOuterLoop_q(mpfr_neg, child_negq, "1e-100", "1e+100", 5 * NTEST, errorBound, 0);
+  checkAccuracyOuterLoop_q(mpfr_neg, child_negq, "0", "Inf", 5 * NTEST, errorBound, 1);
+  checkResult(success, maxError);
+
+  if ((options & 2) != 0) {
+    fprintf(stderr, "strtoq : ");
+    for(int i=0;i<sizeof(stdCheckVals)/sizeof(char *);i++) {
+      Sleef_quad a0 = cast_q_str(stdCheckVals[i]);
+      Sleef_quad a1 = child_strtoq(stdCheckVals[i]);
+      if (memcmp(&a0, &a1, sizeof(Sleef_quad)) == 0) continue;
+      if (isnanf128(a0) && isnanf128(a1)) continue;
+
+      fprintf(stderr, "\narg     = %s\ntest    = %s\ncorrect = %s\n",
+	      stdCheckVals[i], sprintf128(a1), sprintf128(a0));
+      success = 0;
+      break;
+    }
+    checkResult(success, maxError);
+
+    fprintf(stderr, "qtostr : ");
+    for(int i=0;i<sizeof(stdCheckVals)/sizeof(char *);i++) {
+      Sleef_quad a0 = cast_q_str(stdCheckVals[i]);
+      char s[100];
+      child_qtostr(s, a0);
+      Sleef_quad a1 = cast_q_str(s);
+      if (memcmp(&a0, &a1, sizeof(Sleef_quad)) == 0) continue;
+      if (isnanf128(a0) && isnanf128(a1)) continue;
+
+      fprintf(stderr, "\narg     = %s\nteststr = %s\ntest    = %s\ncorrect = %s\n",
+	      stdCheckVals[i], s, sprintf128(a0), sprintf128(a1));
+      success = 0;
+      break;
+    }
+    checkResult(success, maxError);
+  }
 }
 
 int main(int argc, char **argv) {
   char *argv2[argc+2], *commandSde = NULL;
-  int i, a2s;
+  int i, a2s, options;
 
   // BUGFIX: this flush is to prevent incorrect syncing with the
   // `iut*` executable that causes failures in the CPU detection on
@@ -343,11 +413,10 @@ int main(int argc, char **argv) {
 
   {
     char str[256];
-    int u;
 
     if (readln(ctop[0], str, 255) < 1 ||
-	sscanf(str, "%d", &u) != 1 ||
-	(u & 1) == 0) {
+	sscanf(str, "%d", &options) != 1 ||
+	(options & 1) == 0) {
       if (commandSde != NULL) {
 	close(ctop[0]);
 	close(ptoc[1]);
@@ -360,8 +429,8 @@ int main(int argc, char **argv) {
 	startChild(argv2[0], argv2);
 
 	if (readln(ctop[0], str, 255) < 1) stop("Feature detection(sde, readln)");
-	if (sscanf(str, "%d", &u) != 1) stop("Feature detection(sde, sscanf)");
-	if ((u & 1) == 0) {
+	if (sscanf(str, "%d", &options) != 1) stop("Feature detection(sde, sscanf)");
+	if ((options & 1) == 0) {
 	  fprintf(stderr, "\n\nTester : *** CPU does not support the necessary feature(SDE)\n");
 	  return 0;
 	}
@@ -378,7 +447,7 @@ int main(int argc, char **argv) {
 
   fpctop = fdopen(ctop[0], "r");
   
-  do_test();
+  do_test(options);
 
   fprintf(stderr, "\n\n*** All tests passed\n");
 
diff --git a/src/quad-tester/tester2simdqp.c b/src/quad-tester/tester2simdqp.c
@@ -6,6 +6,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <string.h>
 #include <mpfr.h>
 #include <time.h>
 #include <float.h>
@@ -284,6 +285,21 @@ int main(int argc,char **argv)
       }
     }
 
+    {
+      mpfr_set_f128(frx, q0, GMP_RNDN);
+      mpfr_set_f128(fry, q1, GMP_RNDN);
+      mpfr_sub(frz, frx, fry, GMP_RNDN);
+
+      double u0 = countULPf128(t = vget(xsubq_u05(a0, a1), e), frz, 0);
+      
+      if (u0 > 0.5000000001) {
+	printf(ISANAME " sub arg=%s %s ulp=%.20g\n", sprintf128(q0), sprintf128(q1), u0);
+	printf("test = %s\n", sprintf128(t));
+	printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN)));
+	fflush(stdout); ecnt++;
+      }
+    }
+
     {
       mpfr_set_f128(frx, q0, GMP_RNDN);
       mpfr_set_f128(fry, q1, GMP_RNDN);
@@ -327,5 +343,17 @@ int main(int argc,char **argv)
 	fflush(stdout); ecnt++;
       }
     }
+
+#ifdef ENABLE_PUREC_SCALAR
+    if ((cnt & 15) == 1) {
+      char s[64];
+      Sleef_qtostr(s, 63, a0, 10);
+      Sleef_quad q1 = vget(Sleef_strtoq(s, NULL, 10), e);
+      if (memcmp(&q0, &q1, sizeof(Sleef_quad)) != 0 && !(isnanf128(q0) && isnanf128(q1))) {
+	printf("qtostr/strtoq arg=%s\n", sprintf128(q0));
+	fflush(stdout); ecnt++;
+      }
+    }
+#endif
   }
 }
diff --git a/src/quad/qfuncproto.h b/src/quad/qfuncproto.h
@@ -22,19 +22,21 @@ typedef struct {
   funcType:
   0 : vargquad func(vargquad);
   1 : vargquad func(vargquad, vargquad);
-  2 : vargquad2 func(vargquad);   GNUABI : void func(vargquad, double *, double *);
+  2 : vargquad2 func(vargquad);
   3 : vargquad func(vargquad, vint);
   4 : vint func(vargquad);
   5 : vargquad func(vargquad, vargquad, vargquad);
-  6 : vargquad2 func(vargquad);   GNUABI : vargquad func(vargquad, double *);
+  6 : vargquad2 func(vargquad);
   7 : int func(int);
   8 : void *func(int);
  */
 
 funcSpec funcList[] = {
   { "add", 5, 2, 1, 0 },
+  { "sub", 5, 2, 1, 0 },
   { "mul", 5, 2, 1, 0 },
   { "div", 5, 2, 1, 0 },
+  { "neg", -1, 0, 0, 0 },
   { "sqrt", 5, 2, 0, 0 },
   //{ "sincos", 10, 1, 2, 0 },
   //{ "ldexp", -1, 0, 3, 0 },
diff --git a/src/quad/sleefquad_header.h.org b/src/quad/sleefquad_header.h.org
@@ -83,3 +83,7 @@ typedef union {
 
 //
 
+IMPORT Sleef_quad1 Sleef_strtoq(const char *str, char **endptr, int base);
+IMPORT void Sleef_qtostr(char *s, int n, Sleef_quad1 a, int base);
+
+//
diff --git a/src/quad/sleefsimdqp.c b/src/quad/sleefsimdqp.c

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ pipeline {`
`43`	`43`	`'''`
`44`	`44`	`}`
`45`	`45`	`}`
`46`		`-`
	`46`	`+/*`
`47`	`47`	`stage('Intel Compiler') {`
`48`	`48`	`agent { label 'icc' }`
`49`	`49`	`steps {`
`@@ -64,7 +64,7 @@ pipeline {`
`64`	`64`	`'''`
`65`	`65`	`}`
`66`	`66`	`}`
`67`		`-`
	`67`	`+*/`
`68`	`68`	`stage('FMA4') {`
`69`	`69`	`agent { label 'fma4' }`
`70`	`70`	`steps {`