23 # or visit www.oracle.com if you need additional information or have any
24 # questions.
25 #
26
27 generate_perf_tests=$1
28
29 TEMPLATE_FOLDER="templates/"
30
31 unit_output="unit_tests.template"
32 perf_output="perf_tests.template"
33 perf_scalar_output="perf_scalar_tests.template"
34
35 unary="Unary-op"
36 unary_masked="Unary-Masked-op"
37 unary_scalar="Unary-Scalar-op"
38 ternary="Ternary-op"
39 ternary_masked="Ternary-Masked-op"
40 ternary_scalar="Ternary-Scalar-op"
41 binary="Binary-op"
42 binary_masked="Binary-Masked-op"
43 binary_scalar="Binary-Scalar-op"
44 blend="Blend-op"
45 test_template="Test"
46 compare_template="Compare"
47 reduction_scalar="Reduction-Scalar-op"
48 reduction_scalar_min="Reduction-Scalar-Min-op"
49 reduction_scalar_max="Reduction-Scalar-Max-op"
50 reduction_scalar_masked="Reduction-Scalar-Masked-op"
51 reduction_scalar_min_masked="Reduction-Scalar-Masked-Min-op"
52 reduction_scalar_max_masked="Reduction-Scalar-Masked-Max-op"
53 reduction_op="Reduction-op"
54 reduction_op_min="Reduction-Min-op"
55 reduction_op_max="Reduction-Max-op"
56 reduction_op_masked="Reduction-Masked-op"
57 reduction_op_min_masked="Reduction-Masked-Min-op"
58 reduction_op_max_masked="Reduction-Masked-Max-op"
59 unary_math_template="Unary-op-math"
60 binary_math_template="Binary-op-math"
61 bool_reduction_scalar="BoolReduction-Scalar-op"
62 bool_reduction_template="BoolReduction-op"
88 local guard=$7
89 local masked=$8
90 local op_name=$9
91
92 if [ "x${kernel}" != "x" ]; then
93 local kernel_escaped=$(echo -e "$kernel" | tr '\n' '|')
94 sed "s/\[\[KERNEL\]\]/${kernel_escaped}/g" $filename > ${filename}.current1
95 cat ${filename}.current1 | tr '|' "\n" > ${filename}.current
96 rm -f "${filename}.current1"
97 else
98 cp $filename ${filename}.current
99 fi
100
101 # Check if we need to do multiple replacements
102 # If you want to emit for an operation using lanewise(VectorOperator.**, ..) and also using dedicated instruction (e.g. add(..)), then
103 # pass the 'test' argument as "OPERATOR_NAME+func_Name" (e.g. "ADD+add")
104 # if there is a masked version available for the operation add "withMask" to 'test' argument (e.g. "ADD+add+withMask")
105 local test_func=""
106 local withMask=""
107 local tests=($(awk -F+ '{$1=$1} 1' <<< $test))
108 if [ "${tests[1]}" != "" ]; then
109 test=${tests[0]}
110 test_func=${tests[1]}
111 withMask=${tests[2]}
112 fi
113
114 sed_prog="
115 s/\<OPTIONAL\>\(.*\)\<\\OPTIONAL\>/\1/g
116 s/\[\[TEST_TYPE\]\]/${masked}/g
117 s/\[\[TEST_OP\]\]/${op}/g
118 s/\[\[TEST_INIT\]\]/${init}/g
119 s/\[\[OP_NAME\]\]/${op_name}/g
120 "
121 sed_prog_2="$sed_prog
122 s/\[\[TEST\]\]/${test_func}/g
123 s/[.][^(]*(VectorOperators.$test_func, /.$test_func(/g
124 s/[.][^(]*(VectorOperators.$test_func,/.$test_func(/g
125 s/[.][^(]*(VectorOperators.$test_func/.$test_func(/g
126 "
127 sed_prog="
128 $sed_prog
129 s/\[\[TEST\]\]/${test}/g
130 "
131
132 # Guard the test if necessary
133 if [ "$guard" != "" ]; then
134 echo -e "#if[${guard}]\n" >> $output
135 fi
136 sed -e "$sed_prog" < ${filename}.current >> $output
137 # If we also have a dedicated function for the operation then use 2nd sed expression
138 if [[ "$filename" == *"Unit"* ]] && [ "$test_func" != "" ]; then
139 if [ "$masked" == "" ] || [ "$withMask" != "" ]; then
140 sed -e "$sed_prog_2" < ${filename}.current >> $output
141 fi
142 fi
143 if [ "$guard" != "" ]; then
144 echo -e "#end[${guard}]\n" >> $output
145 fi
146
147 rm -f ${filename}.current
148 }
149
150 function gen_op_tmpl {
151 local template=$1
152 local test=$2
153 local op=$3
154 local guard=""
155 local init=""
156 if [ $# -gt 3 ]; then
171 elif [[ $template == *"Get"* ]]; then
172 op_name="extract"
173 fi
174
175 local kernel_filename="${TEMPLATE_FOLDER}/Kernel-${template}.template"
176 local unit_filename="${TEMPLATE_FOLDER}/Unit-${template}.template"
177 if [ ! -f $unit_filename ]; then
178 # Leverage general unit code snippet if no specialization exists
179 unit_filename="${TEMPLATE_FOLDER}/Unit-${template%_*}.template"
180 echo $unit_filename
181 fi
182
183 local kernel=""
184 if [ -f $kernel_filename ]; then
185 kernel="$(cat $kernel_filename)"
186 fi
187
188 # Replace template variables in unit test files (if any)
189 replace_variables $unit_filename $unit_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
190
191 if [ $generate_perf_tests == true ]; then
192 # Replace template variables in performance test files (if any)
193 local perf_wrapper_filename="${TEMPLATE_FOLDER}/Perf-wrapper.template"
194 local perf_vector_filename="${TEMPLATE_FOLDER}/Perf-${template}.template"
195 local perf_scalar_filename="${TEMPLATE_FOLDER}/Perf-Scalar-${template}.template"
196
197 if [ -f $perf_vector_filename ]; then
198 replace_variables $perf_vector_filename $perf_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
199 elif [ -f $kernel_filename ]; then
200 replace_variables $perf_wrapper_filename $perf_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
201 elif [[ $template != *"-Scalar-"* ]] && [[ $template != "Get-op" ]] && [[ $template != "With-Op" ]]; then
202 echo "Warning: missing perf: $@"
203 fi
204
205 if [ -f $perf_scalar_filename ]; then
206 replace_variables $perf_scalar_filename $perf_scalar_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
207 elif [[ $template != *"-Scalar-"* ]] && [[ $template != "Get-op" ]] && [[ $template != "With-Op" ]]; then
208 echo "Warning: Missing PERF SCALAR: $perf_scalar_filename"
209 fi
210 fi
211 }
212
213 function gen_binary_alu_op {
214 echo "Generating binary op $1 ($2)..."
215 gen_op_tmpl $binary "$@"
216 gen_op_tmpl $binary_masked "$@"
217 }
218
219 function gen_shift_cst_op {
220 echo "Generating Shift constant op $1 ($2)..."
221 gen_op_tmpl $shift_template "$@"
222 gen_op_tmpl $shift_masked_template "$@"
223 }
224
225 function gen_unary_alu_op {
226 echo "Generating unary op $1 ($2)..."
227 gen_op_tmpl $unary_scalar "$@"
228 gen_op_tmpl $unary "$@"
229 gen_op_tmpl $unary_masked "$@"
230 }
231
232 function gen_ternary_alu_op {
233 echo "Generating ternary op $1 ($2)..."
234 gen_op_tmpl $ternary_scalar "$@"
235 gen_op_tmpl $ternary "$@"
236 gen_op_tmpl $ternary_masked "$@"
237 }
238
239 function gen_binary_op {
240 echo "Generating binary op $1 ($2)..."
241 # gen_op_tmpl $binary_scalar "$@"
242 gen_op_tmpl $binary "$@"
243 gen_op_tmpl $binary_masked "$@"
244 }
245
246 function gen_binary_op_no_masked {
247 echo "Generating binary op $1 ($2)..."
248 # gen_op_tmpl $binary_scalar "$@"
249 gen_op_tmpl $binary "$@"
250 }
251
252 function gen_reduction_op {
253 echo "Generating reduction op $1 ($2)..."
254 gen_op_tmpl $reduction_scalar "$@"
255 gen_op_tmpl $reduction_op "$@"
256 gen_op_tmpl $reduction_scalar_masked "$@"
257 gen_op_tmpl $reduction_op_masked "$@"
258 }
259
260 function gen_reduction_op_min {
261 echo "Generating reduction op $1 ($2)..."
262 gen_op_tmpl $reduction_scalar_min "$@"
263 gen_op_tmpl $reduction_op_min "$@"
264 gen_op_tmpl $reduction_scalar_min_masked "$@"
265 gen_op_tmpl $reduction_op_min_masked "$@"
266 }
267
268 function gen_reduction_op_max {
269 echo "Generating reduction op $1 ($2)..."
270 gen_op_tmpl $reduction_scalar_max "$@"
271 gen_op_tmpl $reduction_op_max "$@"
314 }
315
316 gen_unit_header $unit_output
317
318 if [ $generate_perf_tests == true ]; then
319 gen_perf_header $perf_output
320 gen_perf_scalar_header $perf_scalar_output
321 fi
322
323 # ALU binary ops.
324 # Here "ADD+add+withMask" says VectorOperator name is "ADD", and we have a dedicate method too named 'add', and add() is also available with mask variant.
325 gen_binary_alu_op "ADD+add+withMask" "a + b"
326 gen_binary_alu_op "SUB+sub+withMask" "a - b"
327 gen_binary_alu_op "MUL+mul+withMask" "a \* b"
328 gen_binary_alu_op "DIV+div+withMask" "a \/ b" "FP"
329 gen_op_tmpl "Binary-op_bitwise-div" "DIV+div+withMask" "a \/ b" "BITWISE"
330 gen_op_tmpl "Binary-Masked-op_bitwise-div" "DIV+div+withMask" "a \/ b" "BITWISE"
331 gen_binary_alu_op "FIRST_NONZERO" "{#if[FP]?Double.doubleToLongBits}(a)!=0?a:b"
332 gen_binary_alu_op "AND+and" "a \& b" "BITWISE"
333 gen_binary_alu_op "AND_NOT" "a \& ~b" "BITWISE"
334 gen_binary_alu_op "OR" "a | b" "BITWISE"
335 # Missing: "OR_UNCHECKED"
336 gen_binary_alu_op "XOR" "a ^ b" "BITWISE"
337
338 # Shifts
339 gen_binary_alu_op "LSHL" "(a << b)" "intOrLong"
340 gen_binary_alu_op "LSHL" "(a << (b \& 0x7))" "byte"
341 gen_binary_alu_op "LSHL" "(a << (b \& 0xF))" "short"
342 gen_binary_alu_op "ASHR" "(a >> b)" "intOrLong"
343 gen_binary_alu_op "ASHR" "(a >> (b \& 0x7))" "byte"
344 gen_binary_alu_op "ASHR" "(a >> (b \& 0xF))" "short"
345 gen_binary_alu_op "LSHR" "(a >>> b)" "intOrLong"
346 gen_binary_alu_op "LSHR" "((a \& 0xFF) >>> (b \& 0x7))" "byte"
347 gen_binary_alu_op "LSHR" "((a \& 0xFFFF) >>> (b \& 0xF))" "short"
348 gen_shift_cst_op "LSHL" "(a << b)" "intOrLong"
349 gen_shift_cst_op "LSHL" "(a << (b \& 7))" "byte"
350 gen_shift_cst_op "LSHL" "(a << (b \& 15))" "short"
351 gen_shift_cst_op "LSHR" "(a >>> b)" "intOrLong"
352 gen_shift_cst_op "LSHR" "((a \& 0xFF) >>> (b \& 7))" "byte"
353 gen_shift_cst_op "LSHR" "((a \& 0xFFFF) >>> (b \& 15))" "short"
354 gen_shift_cst_op "ASHR" "(a >> b)" "intOrLong"
355 gen_shift_cst_op "ASHR" "(a >> (b \& 7))" "byte"
356 gen_shift_cst_op "ASHR" "(a >> (b \& 15))" "short"
357
358 # Masked reductions.
359 gen_binary_op_no_masked "MIN+min" "Math.min(a, b)"
360 gen_binary_op_no_masked "MAX+max" "Math.max(a, b)"
361
362 # Reductions.
363 gen_reduction_op "AND" "\&" "BITWISE" "-1"
364 gen_reduction_op "OR" "|" "BITWISE" "0"
365 gen_reduction_op "XOR" "^" "BITWISE" "0"
366 gen_reduction_op "ADD" "+" "" "0"
367 gen_reduction_op "MUL" "*" "" "1"
368 gen_reduction_op_min "MIN" "" "" "\$Wideboxtype\$.\$MaxValue\$"
369 gen_reduction_op_max "MAX" "" "" "\$Wideboxtype\$.\$MinValue\$"
370 #gen_reduction_op "reduce_FIRST_NONZERO" "lanewise_FIRST_NONZERO" "{#if[FP]?Double.doubleToLongBits}(a)=0?a:b" "" "1"
371
372 # Boolean reductions.
373 gen_bool_reduction_op "anyTrue" "|" "BITWISE" "false"
374 gen_bool_reduction_op "allTrue" "\&" "BITWISE" "true"
375
376 #Insert
377 gen_with_op "withLane" "" "" ""
378
379 # Tests
380 gen_op_tmpl $test_template "IS_DEFAULT" "bits(a)==0"
381 gen_op_tmpl $test_template "IS_NEGATIVE" "bits(a)<0"
382 gen_op_tmpl $test_template "IS_FINITE" "\$Boxtype\$.isFinite(a)" "FP"
383 gen_op_tmpl $test_template "IS_NAN" "\$Boxtype\$.isNaN(a)" "FP"
384 gen_op_tmpl $test_template "IS_INFINITE" "\$Boxtype\$.isInfinite(a)" "FP"
385
386 # Compares
387 gen_op_tmpl $compare_template "LT+lt" "<"
388 gen_op_tmpl $compare_template "GT" ">"
389 gen_op_tmpl $compare_template "EQ+eq" "=="
390 gen_op_tmpl $compare_template "NE" "!="
391 gen_op_tmpl $compare_template "LE" "<="
392 gen_op_tmpl $compare_template "GE" ">="
393
394 # Blend.
395 gen_op_tmpl $blend "blend" ""
396
397 # Rearrange
398 gen_op_tmpl $rearrange_template "rearrange" ""
399
400 # Get
401 gen_get_op "" ""
402
403 # Broadcast
404 gen_op_tmpl $broadcast_template "broadcast" ""
405
406 # Zero
407 gen_op_tmpl $zero_template "zero" ""
408
409 # Slice
410 gen_op_tmpl $slice_template "sliceUnary" ""
411 gen_op_tmpl $slice1_template "sliceBinary" ""
412 gen_op_tmpl $slice1_masked_template "slice" ""
413
414 # Unslice
415 gen_op_tmpl $unslice_template "unsliceUnary" ""
416 gen_op_tmpl $unslice1_template "unsliceBinary" ""
417 gen_op_tmpl $unslice1_masked_template "unslice" ""
418
419 # Math
420 gen_op_tmpl $unary_math_template "SIN" "Math.sin((double)a)" "FP"
421 gen_op_tmpl $unary_math_template "EXP" "Math.exp((double)a)" "FP"
424 gen_op_tmpl $unary_math_template "LOG10" "Math.log10((double)a)" "FP"
425 gen_op_tmpl $unary_math_template "EXPM1" "Math.expm1((double)a)" "FP"
426 gen_op_tmpl $unary_math_template "COS" "Math.cos((double)a)" "FP"
427 gen_op_tmpl $unary_math_template "TAN" "Math.tan((double)a)" "FP"
428 gen_op_tmpl $unary_math_template "SINH" "Math.sinh((double)a)" "FP"
429 gen_op_tmpl $unary_math_template "COSH" "Math.cosh((double)a)" "FP"
430 gen_op_tmpl $unary_math_template "TANH" "Math.tanh((double)a)" "FP"
431 gen_op_tmpl $unary_math_template "ASIN" "Math.asin((double)a)" "FP"
432 gen_op_tmpl $unary_math_template "ACOS" "Math.acos((double)a)" "FP"
433 gen_op_tmpl $unary_math_template "ATAN" "Math.atan((double)a)" "FP"
434 gen_op_tmpl $unary_math_template "CBRT" "Math.cbrt((double)a)" "FP"
435 gen_op_tmpl $binary_math_template "HYPOT" "Math.hypot((double)a, (double)b)" "FP"
436 gen_op_tmpl $binary_math_template "POW" "Math.pow((double)a, (double)b)" "FP"
437 gen_op_tmpl $binary_math_template "ATAN2" "Math.atan2((double)a, (double)b)" "FP"
438
439 # Ternary operations.
440 gen_ternary_alu_op "FMA" "Math.fma(a, b, c)" "FP"
441 gen_ternary_alu_op "BITWISE_BLEND" "(a\&~(c))|(b\&c)" "BITWISE"
442
443 # Unary operations.
444 gen_unary_alu_op "NEG" "-((\$type\$)a)"
445 gen_unary_alu_op "ABS+abs" "Math.abs((\$type\$)a)"
446 gen_unary_alu_op "NOT" "~((\$type\$)a)" "BITWISE"
447 gen_unary_alu_op "ZOMO" "(a==0?0:-1)" "BITWISE"
448 gen_unary_alu_op "SQRT" "Math.sqrt((double)a)" "FP"
449
450 # Gather Scatter operations.
451 gen_op_tmpl $gather_template "gather" ""
452 gen_op_tmpl $gather_masked_template "gather" ""
453 gen_op_tmpl $scatter_template "scatter" ""
454 gen_op_tmpl $scatter_masked_template "scatter" ""
455
456 gen_unit_footer $unit_output
457
458 if [ $generate_perf_tests == true ]; then
459 gen_perf_footer $perf_output
460 gen_perf_scalar_footer $perf_scalar_output
461 fi
462
463 rm -f templates/*.current*
|
23 # or visit www.oracle.com if you need additional information or have any
24 # questions.
25 #
26
27 generate_perf_tests=$1
28
29 TEMPLATE_FOLDER="templates/"
30
31 unit_output="unit_tests.template"
32 perf_output="perf_tests.template"
33 perf_scalar_output="perf_scalar_tests.template"
34
35 unary="Unary-op"
36 unary_masked="Unary-Masked-op"
37 unary_scalar="Unary-Scalar-op"
38 ternary="Ternary-op"
39 ternary_masked="Ternary-Masked-op"
40 ternary_scalar="Ternary-Scalar-op"
41 binary="Binary-op"
42 binary_masked="Binary-Masked-op"
43 binary_broadcast="Binary-Broadcast-op"
44 binary_broadcast_masked="Binary-Broadcast-Masked-op"
45 binary_scalar="Binary-Scalar-op"
46 blend="Blend-op"
47 test_template="Test"
48 compare_template="Compare"
49 reduction_scalar="Reduction-Scalar-op"
50 reduction_scalar_min="Reduction-Scalar-Min-op"
51 reduction_scalar_max="Reduction-Scalar-Max-op"
52 reduction_scalar_masked="Reduction-Scalar-Masked-op"
53 reduction_scalar_min_masked="Reduction-Scalar-Masked-Min-op"
54 reduction_scalar_max_masked="Reduction-Scalar-Masked-Max-op"
55 reduction_op="Reduction-op"
56 reduction_op_min="Reduction-Min-op"
57 reduction_op_max="Reduction-Max-op"
58 reduction_op_masked="Reduction-Masked-op"
59 reduction_op_min_masked="Reduction-Masked-Min-op"
60 reduction_op_max_masked="Reduction-Masked-Max-op"
61 unary_math_template="Unary-op-math"
62 binary_math_template="Binary-op-math"
63 bool_reduction_scalar="BoolReduction-Scalar-op"
64 bool_reduction_template="BoolReduction-op"
90 local guard=$7
91 local masked=$8
92 local op_name=$9
93
94 if [ "x${kernel}" != "x" ]; then
95 local kernel_escaped=$(echo -e "$kernel" | tr '\n' '|')
96 sed "s/\[\[KERNEL\]\]/${kernel_escaped}/g" $filename > ${filename}.current1
97 cat ${filename}.current1 | tr '|' "\n" > ${filename}.current
98 rm -f "${filename}.current1"
99 else
100 cp $filename ${filename}.current
101 fi
102
103 # Check if we need to do multiple replacements
104 # If you want to emit for an operation using lanewise(VectorOperator.**, ..) and also using dedicated instruction (e.g. add(..)), then
105 # pass the 'test' argument as "OPERATOR_NAME+func_Name" (e.g. "ADD+add")
106 # if there is a masked version available for the operation add "withMask" to 'test' argument (e.g. "ADD+add+withMask")
107 local test_func=""
108 local withMask=""
109 local tests=($(awk -F+ '{$1=$1} 1' <<< $test))
110 if [ "${tests[2]}" == "withMask" ]; then
111 test=${tests[0]}
112 test_func=${tests[1]}
113 withMask=${tests[2]}
114 elif [ "${tests[1]}" == "withMask" ]; then
115 test=""
116 test_func=${tests[0]}
117 withMask=${tests[1]}
118 elif [ "${tests[1]}" != "" ]; then
119 test=${tests[0]}
120 test_func=${tests[1]}
121 fi
122
123 sed_prog="
124 s/\<OPTIONAL\>\(.*\)\<\\OPTIONAL\>/\1/g
125 s/\[\[TEST_TYPE\]\]/${masked}/g
126 s/\[\[TEST_OP\]\]/${op}/g
127 s/\[\[TEST_INIT\]\]/${init}/g
128 s/\[\[OP_NAME\]\]/${op_name}/g
129 "
130 sed_prog_2="$sed_prog
131 s/\[\[TEST\]\]/${test_func}/g
132 s/[.][^(]*(VectorOperators.$test_func, /.$test_func(/g
133 s/[.][^(]*(VectorOperators.$test_func,/.$test_func(/g
134 s/[.][^(]*(VectorOperators.$test_func/.$test_func(/g
135 "
136 sed_prog="
137 $sed_prog
138 s/\[\[TEST\]\]/${test}/g
139 "
140
141 # Guard the test if necessary
142 if [ "$guard" != "" ]; then
143 echo -e "#if[${guard}]\n" >> $output
144 fi
145 if [ "$test" != "" ]; then
146 sed -e "$sed_prog" < ${filename}.current >> $output
147 fi
148 # If we also have a dedicated function for the operation then use 2nd sed expression
149 if [[ "$filename" == *"Unit"* ]] && [ "$test_func" != "" ]; then
150 if [ "$masked" == "" ] || [ "$withMask" != "" ]; then
151 sed -e "$sed_prog_2" < ${filename}.current >> $output
152 fi
153 fi
154 if [ "$guard" != "" ]; then
155 echo -e "#end[${guard}]\n" >> $output
156 fi
157
158 rm -f ${filename}.current
159 }
160
161 function gen_op_tmpl {
162 local template=$1
163 local test=$2
164 local op=$3
165 local guard=""
166 local init=""
167 if [ $# -gt 3 ]; then
182 elif [[ $template == *"Get"* ]]; then
183 op_name="extract"
184 fi
185
186 local kernel_filename="${TEMPLATE_FOLDER}/Kernel-${template}.template"
187 local unit_filename="${TEMPLATE_FOLDER}/Unit-${template}.template"
188 if [ ! -f $unit_filename ]; then
189 # Leverage general unit code snippet if no specialization exists
190 unit_filename="${TEMPLATE_FOLDER}/Unit-${template%_*}.template"
191 echo $unit_filename
192 fi
193
194 local kernel=""
195 if [ -f $kernel_filename ]; then
196 kernel="$(cat $kernel_filename)"
197 fi
198
199 # Replace template variables in unit test files (if any)
200 replace_variables $unit_filename $unit_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
201
202 local gen_perf_tests=$generate_perf_tests
203 if [[ $template == *"-Broadcast-"* ]]; then
204 gen_perf_tests=false
205 fi
206 if [ $gen_perf_tests == true ]; then
207 # Replace template variables in performance test files (if any)
208 local perf_wrapper_filename="${TEMPLATE_FOLDER}/Perf-wrapper.template"
209 local perf_vector_filename="${TEMPLATE_FOLDER}/Perf-${template}.template"
210 local perf_scalar_filename="${TEMPLATE_FOLDER}/Perf-Scalar-${template}.template"
211
212 if [ -f $perf_vector_filename ]; then
213 replace_variables $perf_vector_filename $perf_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
214 elif [ -f $kernel_filename ]; then
215 replace_variables $perf_wrapper_filename $perf_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
216 elif [[ $template != *"-Scalar-"* ]] && [[ $template != "Get-op" ]] && [[ $template != "With-Op" ]]; then
217 echo "Warning: missing perf: $@"
218 fi
219
220 if [ -f $perf_scalar_filename ]; then
221 replace_variables $perf_scalar_filename $perf_scalar_output "$kernel" "$test" "$op" "$init" "$guard" "$masked" "$op_name"
222 elif [[ $template != *"-Scalar-"* ]] && [[ $template != "Get-op" ]] && [[ $template != "With-Op" ]]; then
223 echo "Warning: Missing PERF SCALAR: $perf_scalar_filename"
224 fi
225 fi
226 }
227
228 function gen_binary_alu_op {
229 echo "Generating binary op $1 ($2)..."
230 gen_op_tmpl $binary "$@"
231 gen_op_tmpl $binary_masked "$@"
232 }
233
234 function gen_binary_alu_bcst_op {
235 echo "Generating binary broadcast op $1 ($2)..."
236 gen_op_tmpl $binary_broadcast "$@"
237 gen_op_tmpl $binary_broadcast_masked "$@"
238 }
239
240 function gen_shift_cst_op {
241 echo "Generating Shift constant op $1 ($2)..."
242 gen_op_tmpl $shift_template "$@"
243 gen_op_tmpl $shift_masked_template "$@"
244 }
245
246 function gen_unary_alu_op {
247 echo "Generating unary op $1 ($2)..."
248 gen_op_tmpl $unary_scalar "$@"
249 gen_op_tmpl $unary "$@"
250 gen_op_tmpl $unary_masked "$@"
251 }
252
253 function gen_ternary_alu_op {
254 echo "Generating ternary op $1 ($2)..."
255 gen_op_tmpl $ternary_scalar "$@"
256 gen_op_tmpl $ternary "$@"
257 gen_op_tmpl $ternary_masked "$@"
258 }
259
260 function gen_binary_op {
261 echo "Generating binary op $1 ($2)..."
262 # gen_op_tmpl $binary_scalar "$@"
263 gen_op_tmpl $binary "$@"
264 gen_op_tmpl $binary_masked "$@"
265 }
266
267 function gen_binary_op_no_masked {
268 echo "Generating binary op $1 ($2)..."
269 # gen_op_tmpl $binary_scalar "$@"
270 gen_op_tmpl $binary "$@"
271 }
272
273 function gen_binary_bcst_op_no_masked {
274 echo "Generating binary op $1 ($2)..."
275 gen_op_tmpl $binary_broadcast "$@"
276 }
277
278 function gen_reduction_op {
279 echo "Generating reduction op $1 ($2)..."
280 gen_op_tmpl $reduction_scalar "$@"
281 gen_op_tmpl $reduction_op "$@"
282 gen_op_tmpl $reduction_scalar_masked "$@"
283 gen_op_tmpl $reduction_op_masked "$@"
284 }
285
286 function gen_reduction_op_min {
287 echo "Generating reduction op $1 ($2)..."
288 gen_op_tmpl $reduction_scalar_min "$@"
289 gen_op_tmpl $reduction_op_min "$@"
290 gen_op_tmpl $reduction_scalar_min_masked "$@"
291 gen_op_tmpl $reduction_op_min_masked "$@"
292 }
293
294 function gen_reduction_op_max {
295 echo "Generating reduction op $1 ($2)..."
296 gen_op_tmpl $reduction_scalar_max "$@"
297 gen_op_tmpl $reduction_op_max "$@"
340 }
341
342 gen_unit_header $unit_output
343
344 if [ $generate_perf_tests == true ]; then
345 gen_perf_header $perf_output
346 gen_perf_scalar_header $perf_scalar_output
347 fi
348
349 # ALU binary ops.
350 # Here "ADD+add+withMask" says VectorOperator name is "ADD", and we have a dedicate method too named 'add', and add() is also available with mask variant.
351 gen_binary_alu_op "ADD+add+withMask" "a + b"
352 gen_binary_alu_op "SUB+sub+withMask" "a - b"
353 gen_binary_alu_op "MUL+mul+withMask" "a \* b"
354 gen_binary_alu_op "DIV+div+withMask" "a \/ b" "FP"
355 gen_op_tmpl "Binary-op_bitwise-div" "DIV+div+withMask" "a \/ b" "BITWISE"
356 gen_op_tmpl "Binary-Masked-op_bitwise-div" "DIV+div+withMask" "a \/ b" "BITWISE"
357 gen_binary_alu_op "FIRST_NONZERO" "{#if[FP]?Double.doubleToLongBits}(a)!=0?a:b"
358 gen_binary_alu_op "AND+and" "a \& b" "BITWISE"
359 gen_binary_alu_op "AND_NOT" "a \& ~b" "BITWISE"
360 gen_binary_alu_op "OR+or" "a | b" "BITWISE"
361 # Missing: "OR_UNCHECKED"
362 gen_binary_alu_op "XOR" "a ^ b" "BITWISE"
363 # Generate the broadcast versions
364 gen_binary_alu_bcst_op "add+withMask" "a + b"
365 gen_binary_alu_bcst_op "sub+withMask" "a - b"
366 gen_binary_alu_bcst_op "mul+withMask" "a \* b"
367 gen_binary_alu_bcst_op "div+withMask" "a \/ b" "FP"
368 gen_op_tmpl "Binary-Broadcast-op_bitwise-div" "div+withMask" "a \/ b" "BITWISE"
369 gen_op_tmpl "Binary-Broadcast-Masked-op_bitwise-div" "div+withMask" "a \/ b" "BITWISE"
370 gen_binary_alu_bcst_op "OR+or" "a | b" "BITWISE"
371
372 # Shifts
373 gen_binary_alu_op "LSHL" "(a << b)" "intOrLong"
374 gen_binary_alu_op "LSHL" "(a << (b \& 0x7))" "byte"
375 gen_binary_alu_op "LSHL" "(a << (b \& 0xF))" "short"
376 gen_binary_alu_op "ASHR" "(a >> b)" "intOrLong"
377 gen_binary_alu_op "ASHR" "(a >> (b \& 0x7))" "byte"
378 gen_binary_alu_op "ASHR" "(a >> (b \& 0xF))" "short"
379 gen_binary_alu_op "LSHR" "(a >>> b)" "intOrLong"
380 gen_binary_alu_op "LSHR" "((a \& 0xFF) >>> (b \& 0x7))" "byte"
381 gen_binary_alu_op "LSHR" "((a \& 0xFFFF) >>> (b \& 0xF))" "short"
382 gen_shift_cst_op "LSHL" "(a << b)" "intOrLong"
383 gen_shift_cst_op "LSHL" "(a << (b \& 7))" "byte"
384 gen_shift_cst_op "LSHL" "(a << (b \& 15))" "short"
385 gen_shift_cst_op "LSHR" "(a >>> b)" "intOrLong"
386 gen_shift_cst_op "LSHR" "((a \& 0xFF) >>> (b \& 7))" "byte"
387 gen_shift_cst_op "LSHR" "((a \& 0xFFFF) >>> (b \& 15))" "short"
388 gen_shift_cst_op "ASHR" "(a >> b)" "intOrLong"
389 gen_shift_cst_op "ASHR" "(a >> (b \& 7))" "byte"
390 gen_shift_cst_op "ASHR" "(a >> (b \& 15))" "short"
391
392 # Masked reductions.
393 gen_binary_op_no_masked "MIN+min" "Math.min(a, b)"
394 gen_binary_op_no_masked "MAX+max" "Math.max(a, b)"
395 gen_binary_bcst_op_no_masked "MIN+min" "Math.min(a, b)"
396 gen_binary_bcst_op_no_masked "MAX+max" "Math.max(a, b)"
397
398 # Reductions.
399 gen_reduction_op "AND" "\&" "BITWISE" "-1"
400 gen_reduction_op "OR" "|" "BITWISE" "0"
401 gen_reduction_op "XOR" "^" "BITWISE" "0"
402 gen_reduction_op "ADD" "+" "" "0"
403 gen_reduction_op "MUL" "*" "" "1"
404 gen_reduction_op_min "MIN" "" "" "\$Wideboxtype\$.\$MaxValue\$"
405 gen_reduction_op_max "MAX" "" "" "\$Wideboxtype\$.\$MinValue\$"
406 #gen_reduction_op "reduce_FIRST_NONZERO" "lanewise_FIRST_NONZERO" "{#if[FP]?Double.doubleToLongBits}(a)=0?a:b" "" "1"
407
408 # Boolean reductions.
409 gen_bool_reduction_op "anyTrue" "|" "BITWISE" "false"
410 gen_bool_reduction_op "allTrue" "\&" "BITWISE" "true"
411
412 #Insert
413 gen_with_op "withLane" "" "" ""
414
415 # Tests
416 gen_op_tmpl $test_template "IS_DEFAULT" "bits(a)==0"
417 gen_op_tmpl $test_template "IS_NEGATIVE" "bits(a)<0"
418 gen_op_tmpl $test_template "IS_FINITE" "\$Boxtype\$.isFinite(a)" "FP"
419 gen_op_tmpl $test_template "IS_NAN" "\$Boxtype\$.isNaN(a)" "FP"
420 gen_op_tmpl $test_template "IS_INFINITE" "\$Boxtype\$.isInfinite(a)" "FP"
421
422 # Compares
423 gen_op_tmpl $compare_template "LT+lt" "<"
424 gen_op_tmpl $compare_template "GT" ">"
425 gen_op_tmpl $compare_template "EQ+eq" "=="
426 gen_op_tmpl $compare_template "NE" "!="
427 gen_op_tmpl $compare_template "LE" "<="
428 gen_op_tmpl $compare_template "GE" ">="
429
430 # Blend.
431 gen_op_tmpl $blend "blend" ""
432
433 # Rearrange
434 gen_op_tmpl $rearrange_template "rearrange" ""
435
436 # Get
437 gen_get_op "lane" ""
438
439 # Broadcast
440 gen_op_tmpl $broadcast_template "broadcast" ""
441
442 # Zero
443 gen_op_tmpl $zero_template "zero" ""
444
445 # Slice
446 gen_op_tmpl $slice_template "sliceUnary" ""
447 gen_op_tmpl $slice1_template "sliceBinary" ""
448 gen_op_tmpl $slice1_masked_template "slice" ""
449
450 # Unslice
451 gen_op_tmpl $unslice_template "unsliceUnary" ""
452 gen_op_tmpl $unslice1_template "unsliceBinary" ""
453 gen_op_tmpl $unslice1_masked_template "unslice" ""
454
455 # Math
456 gen_op_tmpl $unary_math_template "SIN" "Math.sin((double)a)" "FP"
457 gen_op_tmpl $unary_math_template "EXP" "Math.exp((double)a)" "FP"
460 gen_op_tmpl $unary_math_template "LOG10" "Math.log10((double)a)" "FP"
461 gen_op_tmpl $unary_math_template "EXPM1" "Math.expm1((double)a)" "FP"
462 gen_op_tmpl $unary_math_template "COS" "Math.cos((double)a)" "FP"
463 gen_op_tmpl $unary_math_template "TAN" "Math.tan((double)a)" "FP"
464 gen_op_tmpl $unary_math_template "SINH" "Math.sinh((double)a)" "FP"
465 gen_op_tmpl $unary_math_template "COSH" "Math.cosh((double)a)" "FP"
466 gen_op_tmpl $unary_math_template "TANH" "Math.tanh((double)a)" "FP"
467 gen_op_tmpl $unary_math_template "ASIN" "Math.asin((double)a)" "FP"
468 gen_op_tmpl $unary_math_template "ACOS" "Math.acos((double)a)" "FP"
469 gen_op_tmpl $unary_math_template "ATAN" "Math.atan((double)a)" "FP"
470 gen_op_tmpl $unary_math_template "CBRT" "Math.cbrt((double)a)" "FP"
471 gen_op_tmpl $binary_math_template "HYPOT" "Math.hypot((double)a, (double)b)" "FP"
472 gen_op_tmpl $binary_math_template "POW" "Math.pow((double)a, (double)b)" "FP"
473 gen_op_tmpl $binary_math_template "ATAN2" "Math.atan2((double)a, (double)b)" "FP"
474
475 # Ternary operations.
476 gen_ternary_alu_op "FMA" "Math.fma(a, b, c)" "FP"
477 gen_ternary_alu_op "BITWISE_BLEND" "(a\&~(c))|(b\&c)" "BITWISE"
478
479 # Unary operations.
480 gen_unary_alu_op "NEG+neg" "-((\$type\$)a)"
481 gen_unary_alu_op "ABS+abs" "Math.abs((\$type\$)a)"
482 gen_unary_alu_op "NOT+not" "~((\$type\$)a)" "BITWISE"
483 gen_unary_alu_op "ZOMO" "(a==0?0:-1)" "BITWISE"
484 gen_unary_alu_op "SQRT" "Math.sqrt((double)a)" "FP"
485
486 # Gather Scatter operations.
487 gen_op_tmpl $gather_template "gather" ""
488 gen_op_tmpl $gather_masked_template "gather" ""
489 gen_op_tmpl $scatter_template "scatter" ""
490 gen_op_tmpl $scatter_masked_template "scatter" ""
491
492 gen_unit_footer $unit_output
493
494 if [ $generate_perf_tests == true ]; then
495 gen_perf_footer $perf_output
496 gen_perf_scalar_footer $perf_scalar_output
497 fi
498
499 rm -f templates/*.current*
|