1 /*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
157 void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst,
158 const mlib_f32 *src,
159 const mlib_d64 *kernel,
160 mlib_s32 n,
161 mlib_s32 m,
162 mlib_s32 nch,
163 mlib_s32 dnch)
164 {
165 mlib_f32 *hdst1 = dst + dnch;
166 mlib_s32 i, j;
167
168 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
169 const mlib_f32 *src2 = src + 2 * nch;
170 mlib_f32 hval0 = (mlib_f32) kernel[0];
171 mlib_f32 hval1 = (mlib_f32) kernel[1];
172 mlib_f32 hval2 = (mlib_f32) kernel[2];
173 mlib_f32 val0 = src[0];
174 mlib_f32 val1 = src[nch];
175 mlib_f32 hdvl = dst[0];
176
177 #ifdef __SUNPRO_C
178 #pragma pipeloop(0)
179 #endif /* __SUNPRO_C */
180 for (i = 0; i < n; i++) {
181 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
182 mlib_f32 val2 = src2[i * nch];
183
184 hdvl = hdst1[i * dnch];
185 hdvl0 += val1 * hval1;
186 hdvl0 += val2 * hval2;
187 val0 = val1;
188 val1 = val2;
189
190 dst[i * dnch] = hdvl0;
191 }
192 }
193
194 if (j < m - 1) {
195 const mlib_f32 *src2 = src + 2 * nch;
196 mlib_f32 hval0 = (mlib_f32) kernel[0];
197 mlib_f32 hval1 = (mlib_f32) kernel[1];
198 mlib_f32 val0 = src[0];
199 mlib_f32 val1 = src[nch];
200 mlib_f32 hdvl = dst[0];
201 #ifdef __SUNPRO_C
202 #pragma pipeloop(0)
203 #endif /* __SUNPRO_C */
204 for (i = 0; i < n; i++) {
205 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
206 mlib_f32 val2 = src2[i * nch];
207
208 hdvl = hdst1[i * dnch];
209 hdvl0 += val1 * hval1;
210 val0 = val1;
211 val1 = val2;
212
213 dst[i * dnch] = hdvl0;
214 }
215
216 }
217 else if (j < m) {
218 const mlib_f32 *src2 = src + 2 * nch;
219 mlib_f32 hval0 = (mlib_f32) kernel[0];
220 mlib_f32 val0 = src[0];
221 mlib_f32 val1 = src[nch];
222 mlib_f32 hdvl = dst[0];
223
224 #ifdef __SUNPRO_C
225 #pragma pipeloop(0)
226 #endif /* __SUNPRO_C */
227 for (i = 0; i < n; i++) {
228 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
229 mlib_f32 val2 = src2[i * nch];
230
231 hdvl = hdst1[i * dnch];
232 val0 = val1;
233 val1 = val2;
234
235 dst[i * dnch] = hdvl0;
236 }
237 }
238 }
239
240 /***************************************************************/
241 void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst,
242 const mlib_f32 *src,
243 mlib_s32 n,
244 mlib_s32 nch,
245 mlib_s32 dx_l,
246 mlib_s32 dx_r)
247 {
248 mlib_s32 i;
249 mlib_f32 val = src[0];
250
251 for (i = 0; i < dx_l; i++)
252 dst[i] = val;
253 #ifdef __SUNPRO_C
254 #pragma pipeloop(0)
255 #endif /* __SUNPRO_C */
256 for (; i < n - dx_r; i++)
257 dst[i] = src[nch * (i - dx_l)];
258 val = dst[n - dx_r - 1];
259 for (; i < n; i++)
260 dst[i] = val;
261 }
262
263 /***************************************************************/
264 mlib_status mlib_convMxNext_f32(mlib_image *dst,
265 const mlib_image *src,
266 const mlib_d64 *kernel,
267 mlib_s32 m,
268 mlib_s32 n,
269 mlib_s32 dx_l,
270 mlib_s32 dx_r,
271 mlib_s32 dy_t,
272 mlib_s32 dy_b,
273 mlib_s32 cmask)
274 {
275 mlib_d64 dspace[1024], *dsa = dspace;
332 mlib_s32 m,
333 mlib_s32 nch,
334 mlib_s32 dnch)
335 {
336 mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
337 mlib_s32 i, j;
338
339 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
340 mlib_f32 *src2 = src + 2 * nch;
341 mlib_f32 hval0 = (mlib_f32) hfilter[0];
342 mlib_f32 vval0 = (mlib_f32) vfilter[0];
343 mlib_f32 hval1 = (mlib_f32) hfilter[1];
344 mlib_f32 vval1 = (mlib_f32) vfilter[1];
345 mlib_f32 hval2 = (mlib_f32) hfilter[2];
346 mlib_f32 vval2 = (mlib_f32) vfilter[2];
347 mlib_f32 val0 = src[0];
348 mlib_f32 val1 = src[nch];
349 mlib_f32 hdvl = hdst[0];
350 mlib_f32 vdvl = vdst[0];
351
352 #ifdef __SUNPRO_C
353 #pragma pipeloop(0)
354 #endif /* __SUNPRO_C */
355 for (i = 0; i < n; i++) {
356 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
357 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
358 mlib_f32 val2 = src2[i * nch];
359
360 hdvl = hdst1[i * dnch];
361 vdvl = vdst1[i * dnch];
362 hdvl0 += val1 * hval1;
363 vdvl0 += val1 * vval1;
364 hdvl0 += val2 * hval2;
365 vdvl0 += val2 * vval2;
366 val0 = val1;
367 val1 = val2;
368
369 hdst[i * dnch] = hdvl0;
370 vdst[i * dnch] = vdvl0;
371 }
372 }
373
374 if (j < m - 1) {
375 mlib_f32 *src2 = src + 2 * nch;
376 mlib_f32 hval0 = (mlib_f32) hfilter[0];
377 mlib_f32 vval0 = (mlib_f32) vfilter[0];
378 mlib_f32 hval1 = (mlib_f32) hfilter[1];
379 mlib_f32 vval1 = (mlib_f32) vfilter[1];
380 mlib_f32 val0 = src[0];
381 mlib_f32 val1 = src[nch];
382 mlib_f32 hdvl = hdst[0];
383 mlib_f32 vdvl = vdst[0];
384
385 #ifdef __SUNPRO_C
386 #pragma pipeloop(0)
387 #endif /* __SUNPRO_C */
388 for (i = 0; i < n; i++) {
389 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
390 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
391 mlib_f32 val2 = src2[i * nch];
392
393 hdvl = hdst1[i * dnch];
394 vdvl = vdst1[i * dnch];
395 hdvl0 += val1 * hval1;
396 vdvl0 += val1 * vval1;
397 val0 = val1;
398 val1 = val2;
399
400 hdst[i * dnch] = hdvl0;
401 vdst[i * dnch] = vdvl0;
402 }
403
404 }
405 else if (j < m) {
406 mlib_f32 *src2 = src + 2 * nch;
407 mlib_f32 hval0 = (mlib_f32) hfilter[0];
408 mlib_f32 vval0 = (mlib_f32) vfilter[0];
409 mlib_f32 val0 = src[0];
410 mlib_f32 val1 = src[nch];
411 mlib_f32 hdvl = hdst[0];
412 mlib_f32 vdvl = vdst[0];
413
414 #ifdef __SUNPRO_C
415 #pragma pipeloop(0)
416 #endif /* __SUNPRO_C */
417 for (i = 0; i < n; i++) {
418 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
419 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
420 mlib_f32 val2 = src2[i * nch];
421
422 hdvl = hdst1[i * dnch];
423 vdvl = vdst1[i * dnch];
424 val0 = val1;
425 val1 = val2;
426
427 hdst[i * dnch] = hdvl0;
428 vdst[i * dnch] = vdvl0;
429 }
430 }
431 }
432
433 /***************************************************************/
434 void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst,
435 mlib_d64 *vdst,
436 const mlib_d64 *src,
440 mlib_s32 m,
441 mlib_s32 nch,
442 mlib_s32 dnch)
443 {
444 mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
445 mlib_s32 i, j;
446
447 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
448 mlib_d64 *src2 = src + 2 * nch;
449 mlib_d64 hval0 = hfilter[0];
450 mlib_d64 vval0 = vfilter[0];
451 mlib_d64 hval1 = hfilter[1];
452 mlib_d64 vval1 = vfilter[1];
453 mlib_d64 hval2 = hfilter[2];
454 mlib_d64 vval2 = vfilter[2];
455 mlib_d64 val0 = src[0];
456 mlib_d64 val1 = src[nch];
457 mlib_d64 hdvl = hdst[0];
458 mlib_d64 vdvl = vdst[0];
459
460 #ifdef __SUNPRO_C
461 #pragma pipeloop(0)
462 #endif /* __SUNPRO_C */
463 for (i = 0; i < n; i++) {
464 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
465 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
466 mlib_d64 val2 = src2[i * nch];
467
468 hdvl = hdst1[i * dnch];
469 vdvl = vdst1[i * dnch];
470 hdvl0 += val1 * hval1;
471 vdvl0 += val1 * vval1;
472 hdvl0 += val2 * hval2;
473 vdvl0 += val2 * vval2;
474 val0 = val1;
475 val1 = val2;
476
477 hdst[i * dnch] = hdvl0;
478 vdst[i * dnch] = vdvl0;
479 }
480 }
481
482 if (j < m - 1) {
483 mlib_d64 *src2 = src + 2 * nch;
484 mlib_d64 hval0 = hfilter[0];
485 mlib_d64 vval0 = vfilter[0];
486 mlib_d64 hval1 = hfilter[1];
487 mlib_d64 vval1 = vfilter[1];
488 mlib_d64 val0 = src[0];
489 mlib_d64 val1 = src[nch];
490 mlib_d64 hdvl = hdst[0];
491 mlib_d64 vdvl = vdst[0];
492
493 #ifdef __SUNPRO_C
494 #pragma pipeloop(0)
495 #endif /* __SUNPRO_C */
496 for (i = 0; i < n; i++) {
497 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
498 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
499 mlib_d64 val2 = src2[i * nch];
500
501 hdvl = hdst1[i * dnch];
502 vdvl = vdst1[i * dnch];
503 hdvl0 += val1 * hval1;
504 vdvl0 += val1 * vval1;
505 val0 = val1;
506 val1 = val2;
507
508 hdst[i * dnch] = hdvl0;
509 vdst[i * dnch] = vdvl0;
510 }
511
512 }
513 else if (j < m) {
514 mlib_d64 *src2 = src + 2 * nch;
515 mlib_d64 hval0 = hfilter[0];
516 mlib_d64 vval0 = vfilter[0];
517 mlib_d64 val0 = src[0];
518 mlib_d64 val1 = src[nch];
519 mlib_d64 hdvl = hdst[0];
520 mlib_d64 vdvl = vdst[0];
521
522 #ifdef __SUNPRO_C
523 #pragma pipeloop(0)
524 #endif /* __SUNPRO_C */
525 for (i = 0; i < n; i++) {
526 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
527 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
528 mlib_d64 val2 = src2[i * nch];
529
530 hdvl = hdst1[i * dnch];
531 vdvl = vdst1[i * dnch];
532 val0 = val1;
533 val1 = val2;
534
535 hdst[i * dnch] = hdvl0;
536 vdst[i * dnch] = vdvl0;
537 }
538 }
539 }
540
541 #endif /* 0 */
542
543 /***************************************************************/
544 void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst,
545 const mlib_d64 *src,
546 const mlib_d64 *kernel,
547 mlib_s32 n,
548 mlib_s32 m,
549 mlib_s32 nch,
550 mlib_s32 dnch)
551 {
552 mlib_d64 *hdst1 = dst + dnch;
553 mlib_s32 i, j;
554
555 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
556 const mlib_d64 *src2 = src + 2 * nch;
557 mlib_d64 hval0 = kernel[0];
558 mlib_d64 hval1 = kernel[1];
559 mlib_d64 hval2 = kernel[2];
560 mlib_d64 val0 = src[0];
561 mlib_d64 val1 = src[nch];
562 mlib_d64 hdvl = dst[0];
563
564 #ifdef __SUNPRO_C
565 #pragma pipeloop(0)
566 #endif /* __SUNPRO_C */
567 for (i = 0; i < n; i++) {
568 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
569 mlib_d64 val2 = src2[i * nch];
570
571 hdvl = hdst1[i * dnch];
572 hdvl0 += val1 * hval1;
573 hdvl0 += val2 * hval2;
574 val0 = val1;
575 val1 = val2;
576
577 dst[i * dnch] = hdvl0;
578 }
579 }
580
581 if (j < m - 1) {
582 const mlib_d64 *src2 = src + 2 * nch;
583 mlib_d64 hval0 = kernel[0];
584 mlib_d64 hval1 = kernel[1];
585 mlib_d64 val0 = src[0];
586 mlib_d64 val1 = src[nch];
587 mlib_d64 hdvl = dst[0];
588
589 #ifdef __SUNPRO_C
590 #pragma pipeloop(0)
591 #endif /* __SUNPRO_C */
592 for (i = 0; i < n; i++) {
593 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
594 mlib_d64 val2 = src2[i * nch];
595
596 hdvl = hdst1[i * dnch];
597 hdvl0 += val1 * hval1;
598 val0 = val1;
599 val1 = val2;
600
601 dst[i * dnch] = hdvl0;
602 }
603
604 }
605 else if (j < m) {
606 const mlib_d64 *src2 = src + 2 * nch;
607 mlib_d64 hval0 = kernel[0];
608 mlib_d64 val0 = src[0];
609 mlib_d64 val1 = src[nch];
610 mlib_d64 hdvl = dst[0];
611
612 #ifdef __SUNPRO_C
613 #pragma pipeloop(0)
614 #endif /* __SUNPRO_C */
615 for (i = 0; i < n; i++) {
616 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
617 mlib_d64 val2 = src2[i * nch];
618
619 hdvl = hdst1[i * dnch];
620 val0 = val1;
621 val1 = val2;
622
623 dst[i * dnch] = hdvl0;
624 }
625 }
626 }
627
628 /***************************************************************/
629 void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst,
630 const mlib_d64 *src,
631 mlib_s32 n,
632 mlib_s32 nch,
633 mlib_s32 dx_l,
634 mlib_s32 dx_r)
635 {
636 mlib_s32 i;
637 mlib_d64 val = src[0];
638
639 for (i = 0; i < dx_l; i++)
640 dst[i] = val;
641 #ifdef __SUNPRO_C
642 #pragma pipeloop(0)
643 #endif /* __SUNPRO_C */
644 for (; i < n - dx_r; i++)
645 dst[i] = src[nch * (i - dx_l)];
646 val = dst[n - dx_r - 1];
647 for (; i < n; i++)
648 dst[i] = val;
649 }
650
651 /***************************************************************/
652 mlib_status mlib_convMxNext_d64(mlib_image *dst,
653 const mlib_image *src,
654 const mlib_d64 *kernel,
655 mlib_s32 m,
656 mlib_s32 n,
657 mlib_s32 dx_l,
658 mlib_s32 dx_r,
659 mlib_s32 dy_t,
660 mlib_s32 dy_b,
661 mlib_s32 cmask)
662 {
663 mlib_d64 dspace[1024], *dsa = dspace;
|
1 /*
2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
157 void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst,
158 const mlib_f32 *src,
159 const mlib_d64 *kernel,
160 mlib_s32 n,
161 mlib_s32 m,
162 mlib_s32 nch,
163 mlib_s32 dnch)
164 {
165 mlib_f32 *hdst1 = dst + dnch;
166 mlib_s32 i, j;
167
168 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
169 const mlib_f32 *src2 = src + 2 * nch;
170 mlib_f32 hval0 = (mlib_f32) kernel[0];
171 mlib_f32 hval1 = (mlib_f32) kernel[1];
172 mlib_f32 hval2 = (mlib_f32) kernel[2];
173 mlib_f32 val0 = src[0];
174 mlib_f32 val1 = src[nch];
175 mlib_f32 hdvl = dst[0];
176
177 for (i = 0; i < n; i++) {
178 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
179 mlib_f32 val2 = src2[i * nch];
180
181 hdvl = hdst1[i * dnch];
182 hdvl0 += val1 * hval1;
183 hdvl0 += val2 * hval2;
184 val0 = val1;
185 val1 = val2;
186
187 dst[i * dnch] = hdvl0;
188 }
189 }
190
191 if (j < m - 1) {
192 const mlib_f32 *src2 = src + 2 * nch;
193 mlib_f32 hval0 = (mlib_f32) kernel[0];
194 mlib_f32 hval1 = (mlib_f32) kernel[1];
195 mlib_f32 val0 = src[0];
196 mlib_f32 val1 = src[nch];
197 mlib_f32 hdvl = dst[0];
198 for (i = 0; i < n; i++) {
199 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
200 mlib_f32 val2 = src2[i * nch];
201
202 hdvl = hdst1[i * dnch];
203 hdvl0 += val1 * hval1;
204 val0 = val1;
205 val1 = val2;
206
207 dst[i * dnch] = hdvl0;
208 }
209
210 }
211 else if (j < m) {
212 const mlib_f32 *src2 = src + 2 * nch;
213 mlib_f32 hval0 = (mlib_f32) kernel[0];
214 mlib_f32 val0 = src[0];
215 mlib_f32 val1 = src[nch];
216 mlib_f32 hdvl = dst[0];
217
218 for (i = 0; i < n; i++) {
219 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
220 mlib_f32 val2 = src2[i * nch];
221
222 hdvl = hdst1[i * dnch];
223 val0 = val1;
224 val1 = val2;
225
226 dst[i * dnch] = hdvl0;
227 }
228 }
229 }
230
231 /***************************************************************/
232 void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst,
233 const mlib_f32 *src,
234 mlib_s32 n,
235 mlib_s32 nch,
236 mlib_s32 dx_l,
237 mlib_s32 dx_r)
238 {
239 mlib_s32 i;
240 mlib_f32 val = src[0];
241
242 for (i = 0; i < dx_l; i++)
243 dst[i] = val;
244 for (; i < n - dx_r; i++)
245 dst[i] = src[nch * (i - dx_l)];
246 val = dst[n - dx_r - 1];
247 for (; i < n; i++)
248 dst[i] = val;
249 }
250
251 /***************************************************************/
252 mlib_status mlib_convMxNext_f32(mlib_image *dst,
253 const mlib_image *src,
254 const mlib_d64 *kernel,
255 mlib_s32 m,
256 mlib_s32 n,
257 mlib_s32 dx_l,
258 mlib_s32 dx_r,
259 mlib_s32 dy_t,
260 mlib_s32 dy_b,
261 mlib_s32 cmask)
262 {
263 mlib_d64 dspace[1024], *dsa = dspace;
320 mlib_s32 m,
321 mlib_s32 nch,
322 mlib_s32 dnch)
323 {
324 mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
325 mlib_s32 i, j;
326
327 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
328 mlib_f32 *src2 = src + 2 * nch;
329 mlib_f32 hval0 = (mlib_f32) hfilter[0];
330 mlib_f32 vval0 = (mlib_f32) vfilter[0];
331 mlib_f32 hval1 = (mlib_f32) hfilter[1];
332 mlib_f32 vval1 = (mlib_f32) vfilter[1];
333 mlib_f32 hval2 = (mlib_f32) hfilter[2];
334 mlib_f32 vval2 = (mlib_f32) vfilter[2];
335 mlib_f32 val0 = src[0];
336 mlib_f32 val1 = src[nch];
337 mlib_f32 hdvl = hdst[0];
338 mlib_f32 vdvl = vdst[0];
339
340 for (i = 0; i < n; i++) {
341 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
342 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
343 mlib_f32 val2 = src2[i * nch];
344
345 hdvl = hdst1[i * dnch];
346 vdvl = vdst1[i * dnch];
347 hdvl0 += val1 * hval1;
348 vdvl0 += val1 * vval1;
349 hdvl0 += val2 * hval2;
350 vdvl0 += val2 * vval2;
351 val0 = val1;
352 val1 = val2;
353
354 hdst[i * dnch] = hdvl0;
355 vdst[i * dnch] = vdvl0;
356 }
357 }
358
359 if (j < m - 1) {
360 mlib_f32 *src2 = src + 2 * nch;
361 mlib_f32 hval0 = (mlib_f32) hfilter[0];
362 mlib_f32 vval0 = (mlib_f32) vfilter[0];
363 mlib_f32 hval1 = (mlib_f32) hfilter[1];
364 mlib_f32 vval1 = (mlib_f32) vfilter[1];
365 mlib_f32 val0 = src[0];
366 mlib_f32 val1 = src[nch];
367 mlib_f32 hdvl = hdst[0];
368 mlib_f32 vdvl = vdst[0];
369
370 for (i = 0; i < n; i++) {
371 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
372 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
373 mlib_f32 val2 = src2[i * nch];
374
375 hdvl = hdst1[i * dnch];
376 vdvl = vdst1[i * dnch];
377 hdvl0 += val1 * hval1;
378 vdvl0 += val1 * vval1;
379 val0 = val1;
380 val1 = val2;
381
382 hdst[i * dnch] = hdvl0;
383 vdst[i * dnch] = vdvl0;
384 }
385
386 }
387 else if (j < m) {
388 mlib_f32 *src2 = src + 2 * nch;
389 mlib_f32 hval0 = (mlib_f32) hfilter[0];
390 mlib_f32 vval0 = (mlib_f32) vfilter[0];
391 mlib_f32 val0 = src[0];
392 mlib_f32 val1 = src[nch];
393 mlib_f32 hdvl = hdst[0];
394 mlib_f32 vdvl = vdst[0];
395
396 for (i = 0; i < n; i++) {
397 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
398 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
399 mlib_f32 val2 = src2[i * nch];
400
401 hdvl = hdst1[i * dnch];
402 vdvl = vdst1[i * dnch];
403 val0 = val1;
404 val1 = val2;
405
406 hdst[i * dnch] = hdvl0;
407 vdst[i * dnch] = vdvl0;
408 }
409 }
410 }
411
412 /***************************************************************/
413 void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst,
414 mlib_d64 *vdst,
415 const mlib_d64 *src,
419 mlib_s32 m,
420 mlib_s32 nch,
421 mlib_s32 dnch)
422 {
423 mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
424 mlib_s32 i, j;
425
426 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
427 mlib_d64 *src2 = src + 2 * nch;
428 mlib_d64 hval0 = hfilter[0];
429 mlib_d64 vval0 = vfilter[0];
430 mlib_d64 hval1 = hfilter[1];
431 mlib_d64 vval1 = vfilter[1];
432 mlib_d64 hval2 = hfilter[2];
433 mlib_d64 vval2 = vfilter[2];
434 mlib_d64 val0 = src[0];
435 mlib_d64 val1 = src[nch];
436 mlib_d64 hdvl = hdst[0];
437 mlib_d64 vdvl = vdst[0];
438
439 for (i = 0; i < n; i++) {
440 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
441 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
442 mlib_d64 val2 = src2[i * nch];
443
444 hdvl = hdst1[i * dnch];
445 vdvl = vdst1[i * dnch];
446 hdvl0 += val1 * hval1;
447 vdvl0 += val1 * vval1;
448 hdvl0 += val2 * hval2;
449 vdvl0 += val2 * vval2;
450 val0 = val1;
451 val1 = val2;
452
453 hdst[i * dnch] = hdvl0;
454 vdst[i * dnch] = vdvl0;
455 }
456 }
457
458 if (j < m - 1) {
459 mlib_d64 *src2 = src + 2 * nch;
460 mlib_d64 hval0 = hfilter[0];
461 mlib_d64 vval0 = vfilter[0];
462 mlib_d64 hval1 = hfilter[1];
463 mlib_d64 vval1 = vfilter[1];
464 mlib_d64 val0 = src[0];
465 mlib_d64 val1 = src[nch];
466 mlib_d64 hdvl = hdst[0];
467 mlib_d64 vdvl = vdst[0];
468
469 for (i = 0; i < n; i++) {
470 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
471 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
472 mlib_d64 val2 = src2[i * nch];
473
474 hdvl = hdst1[i * dnch];
475 vdvl = vdst1[i * dnch];
476 hdvl0 += val1 * hval1;
477 vdvl0 += val1 * vval1;
478 val0 = val1;
479 val1 = val2;
480
481 hdst[i * dnch] = hdvl0;
482 vdst[i * dnch] = vdvl0;
483 }
484
485 }
486 else if (j < m) {
487 mlib_d64 *src2 = src + 2 * nch;
488 mlib_d64 hval0 = hfilter[0];
489 mlib_d64 vval0 = vfilter[0];
490 mlib_d64 val0 = src[0];
491 mlib_d64 val1 = src[nch];
492 mlib_d64 hdvl = hdst[0];
493 mlib_d64 vdvl = vdst[0];
494
495 for (i = 0; i < n; i++) {
496 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
497 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
498 mlib_d64 val2 = src2[i * nch];
499
500 hdvl = hdst1[i * dnch];
501 vdvl = vdst1[i * dnch];
502 val0 = val1;
503 val1 = val2;
504
505 hdst[i * dnch] = hdvl0;
506 vdst[i * dnch] = vdvl0;
507 }
508 }
509 }
510
511 #endif /* 0 */
512
513 /***************************************************************/
514 void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst,
515 const mlib_d64 *src,
516 const mlib_d64 *kernel,
517 mlib_s32 n,
518 mlib_s32 m,
519 mlib_s32 nch,
520 mlib_s32 dnch)
521 {
522 mlib_d64 *hdst1 = dst + dnch;
523 mlib_s32 i, j;
524
525 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
526 const mlib_d64 *src2 = src + 2 * nch;
527 mlib_d64 hval0 = kernel[0];
528 mlib_d64 hval1 = kernel[1];
529 mlib_d64 hval2 = kernel[2];
530 mlib_d64 val0 = src[0];
531 mlib_d64 val1 = src[nch];
532 mlib_d64 hdvl = dst[0];
533
534 for (i = 0; i < n; i++) {
535 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
536 mlib_d64 val2 = src2[i * nch];
537
538 hdvl = hdst1[i * dnch];
539 hdvl0 += val1 * hval1;
540 hdvl0 += val2 * hval2;
541 val0 = val1;
542 val1 = val2;
543
544 dst[i * dnch] = hdvl0;
545 }
546 }
547
548 if (j < m - 1) {
549 const mlib_d64 *src2 = src + 2 * nch;
550 mlib_d64 hval0 = kernel[0];
551 mlib_d64 hval1 = kernel[1];
552 mlib_d64 val0 = src[0];
553 mlib_d64 val1 = src[nch];
554 mlib_d64 hdvl = dst[0];
555
556 for (i = 0; i < n; i++) {
557 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
558 mlib_d64 val2 = src2[i * nch];
559
560 hdvl = hdst1[i * dnch];
561 hdvl0 += val1 * hval1;
562 val0 = val1;
563 val1 = val2;
564
565 dst[i * dnch] = hdvl0;
566 }
567
568 }
569 else if (j < m) {
570 const mlib_d64 *src2 = src + 2 * nch;
571 mlib_d64 hval0 = kernel[0];
572 mlib_d64 val0 = src[0];
573 mlib_d64 val1 = src[nch];
574 mlib_d64 hdvl = dst[0];
575
576 for (i = 0; i < n; i++) {
577 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
578 mlib_d64 val2 = src2[i * nch];
579
580 hdvl = hdst1[i * dnch];
581 val0 = val1;
582 val1 = val2;
583
584 dst[i * dnch] = hdvl0;
585 }
586 }
587 }
588
589 /***************************************************************/
590 void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst,
591 const mlib_d64 *src,
592 mlib_s32 n,
593 mlib_s32 nch,
594 mlib_s32 dx_l,
595 mlib_s32 dx_r)
596 {
597 mlib_s32 i;
598 mlib_d64 val = src[0];
599
600 for (i = 0; i < dx_l; i++)
601 dst[i] = val;
602 for (; i < n - dx_r; i++)
603 dst[i] = src[nch * (i - dx_l)];
604 val = dst[n - dx_r - 1];
605 for (; i < n; i++)
606 dst[i] = val;
607 }
608
609 /***************************************************************/
610 mlib_status mlib_convMxNext_d64(mlib_image *dst,
611 const mlib_image *src,
612 const mlib_d64 *kernel,
613 mlib_s32 m,
614 mlib_s32 n,
615 mlib_s32 dx_l,
616 mlib_s32 dx_r,
617 mlib_s32 dy_t,
618 mlib_s32 dy_b,
619 mlib_s32 cmask)
620 {
621 mlib_d64 dspace[1024], *dsa = dspace;
|