1642 return offset; 1643 } 1644 1645 1646 //============================================================================= 1647 1648 // Float masks come from different places depending on platform. 1649 #ifdef _LP64 1650 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1651 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1652 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1653 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1654 #else 1655 static address float_signmask() { return (address)float_signmask_pool; } 1656 static address float_signflip() { return (address)float_signflip_pool; } 1657 static address double_signmask() { return (address)double_signmask_pool; } 1658 static address double_signflip() { return (address)double_signflip_pool; } 1659 #endif 1660 1661 1662 const bool Matcher::match_rule_supported(int opcode) { 1663 if (!has_match_rule(opcode)) 1664 return false; 1665 1666 bool ret_value = true; 1667 switch (opcode) { 1668 case Op_PopCountI: 1669 case Op_PopCountL: 1670 if (!UsePopCountInstruction) 1671 ret_value = false; 1672 break; 1673 case Op_MulVI: 1674 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1675 ret_value = false; 1676 break; 1677 case Op_MulVL: 1678 case Op_MulReductionVL: 1679 if (VM_Version::supports_avx512dq() == false) 1680 ret_value = false; 1681 break; 1682 case Op_AddReductionVL: 1683 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1684 ret_value = false; 1685 break; 1686 case Op_AddReductionVI: 1687 if (UseSSE < 3) // requires at least SSE3 1688 ret_value = false; 1689 break; 1690 case Op_MulReductionVI: 1691 if (UseSSE < 4) // requires at least SSE4 1692 ret_value = false; 1693 break; 1694 case Op_AddReductionVF: 1695 case Op_AddReductionVD: 1696 case Op_MulReductionVF: 1697 case Op_MulReductionVD: 1698 if (UseSSE < 1) // requires at least SSE 1699 ret_value = false; 1700 break; 1701 case Op_SqrtVD: 1702 if (UseAVX < 1) // enabled for AVX only 1703 ret_value = false; 1704 break; 1705 case Op_CompareAndSwapL: 1706 #ifdef _LP64 1707 case Op_CompareAndSwapP: 1708 #endif 1709 if (!VM_Version::supports_cx8()) 1710 ret_value = false; 1711 break; 1712 case Op_CMoveVD: 1713 if (UseAVX < 1 || UseAVX > 2) 1714 ret_value = false; 1715 break; 1716 case Op_StrIndexOf: 1717 if (!UseSSE42Intrinsics) 1718 ret_value = false; 1719 break; 1720 case Op_StrIndexOfChar: 1721 if (!UseSSE42Intrinsics) 1722 ret_value = false; 1723 break; 1724 case Op_OnSpinWait: 1725 if (VM_Version::supports_on_spin_wait() == false) 1726 ret_value = false; 1727 break; 1728 } 1729 1730 return ret_value; // Per default match rules are supported. 1731 } 1732 1733 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1734 // identify extra cases that we might want to provide match rules for 1735 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1736 bool ret_value = match_rule_supported(opcode); 1737 if (ret_value) { 1738 switch (opcode) { 1739 case Op_AddVB: 1740 case Op_SubVB: 1741 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1742 ret_value = false; 1743 break; 1744 case Op_URShiftVS: 1745 case Op_RShiftVS: 1746 case Op_LShiftVS: 1747 case Op_MulVS: 1748 case Op_AddVS: 1749 case Op_SubVS: 1750 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1751 ret_value = false; 1752 break; 1753 case Op_CMoveVD: 1754 if (vlen != 4) 1755 ret_value = false; 1756 break; 1757 } 1758 } 1759 1760 return ret_value; // Per default match rules are supported. 1761 } 1762 1763 const bool Matcher::has_predicated_vectors(void) { 1764 bool ret_value = false; 1765 if (UseAVX > 2) { 1766 ret_value = VM_Version::supports_avx512vl(); 1767 } 1768 1769 return ret_value; 1770 } 1771 1772 const int Matcher::float_pressure(int default_pressure_threshold) { 1773 int float_pressure_threshold = default_pressure_threshold; 1817 if (size < 4) return 0; 1818 break; 1819 default: 1820 ShouldNotReachHere(); 1821 } 1822 return size; 1823 } 1824 1825 // Limits on vector size (number of elements) loaded into vector. 1826 const int Matcher::max_vector_size(const BasicType bt) { 1827 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1828 } 1829 const int Matcher::min_vector_size(const BasicType bt) { 1830 int max_size = max_vector_size(bt); 1831 // Min size which can be loaded into vector is 4 bytes. 1832 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1833 return MIN2(size,max_size); 1834 } 1835 1836 // Vector ideal reg corresponding to specidied size in bytes 1837 const int Matcher::vector_ideal_reg(int size) { 1838 assert(MaxVectorSize >= size, ""); 1839 switch(size) { 1840 case 4: return Op_VecS; 1841 case 8: return Op_VecD; 1842 case 16: return Op_VecX; 1843 case 32: return Op_VecY; 1844 case 64: return Op_VecZ; 1845 } 1846 ShouldNotReachHere(); 1847 return 0; 1848 } 1849 1850 // Only lowest bits of xmm reg are used for vector shift count. 1851 const int Matcher::vector_shift_count_ideal_reg(int size) { 1852 return Op_VecS; 1853 } 1854 1855 // x86 supports misaligned vectors store/load. 1856 const bool Matcher::misaligned_vectors_ok() { 1857 return !AlignVector; // can be changed by flag 1858 } 1859 1860 // x86 AES instructions are compatible with SunJCE expanded 1861 // keys, hence we do not need to pass the original key to stubs 1862 const bool Matcher::pass_original_key_for_aes() { 1863 return false; 1864 } 1865 1866 1867 const bool Matcher::convi2l_type_required = true; 1868 1869 // Check for shift by small constant as well 1870 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1871 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1872 shift->in(2)->get_int() <= 3 && 1873 // Are there other uses besides address expressions? 1874 !matcher->is_visited(shift)) { 1875 address_visited.set(shift->_idx); // Flag as address_visited 1876 mstack.push(shift->in(2), Matcher::Visit); 1877 Node *conv = shift->in(1); 1878 #ifdef _LP64 1879 // Allow Matcher to match the rule which bypass 1880 // ConvI2L operation for an array index on LP64 1881 // if the index value is positive. 1882 if (conv->Opcode() == Op_ConvI2L && 1883 conv->as_Type()->type()->is_long()->_lo >= 0 && 1884 // Are there other uses besides address expressions? 1885 !matcher->is_visited(conv)) { 1886 address_visited.set(conv->_idx); // Flag as address_visited 1887 mstack.push(conv->in(1), Matcher::Pre_Visit); 1888 } else 1889 #endif 1890 mstack.push(conv, Matcher::Pre_Visit); 1891 return true; 1892 } 1893 return false; 1894 } 1895 1896 // Should the Matcher clone shifts on addressing modes, expecting them 1897 // to be subsumed into complex addressing expressions or compute them 1898 // into registers? 1899 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1900 Node *off = m->in(AddPNode::Offset); 1901 if (off->is_Con()) { 1902 address_visited.test_set(m->_idx); // Flag as address_visited 1921 } 1922 1923 // Clone X+offset as it also folds into most addressing expressions 1924 mstack.push(off, Visit); 1925 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1926 return true; 1927 } else if (clone_shift(off, this, mstack, address_visited)) { 1928 address_visited.test_set(m->_idx); // Flag as address_visited 1929 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1930 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1931 return true; 1932 } 1933 return false; 1934 } 1935 1936 void Compile::reshape_address(AddPNode* addp) { 1937 } 1938 1939 // Helper methods for MachSpillCopyNode::implementation(). 1940 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1941 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1942 // In 64-bit VM size calculation is very complex. Emitting instructions 1943 // into scratch buffer is used to get size in 64-bit VM. 1944 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1945 assert(ireg == Op_VecS || // 32bit vector 1946 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1947 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1948 "no non-adjacent vector moves" ); 1949 if (cbuf) { 1950 MacroAssembler _masm(cbuf); 1951 int offset = __ offset(); 1952 switch (ireg) { 1953 case Op_VecS: // copy whole register 1954 case Op_VecD: 1955 case Op_VecX: 1956 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1957 break; 1958 case Op_VecY: 1959 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1960 break; 1961 case Op_VecZ: 1962 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1963 break; 1964 default: 1965 ShouldNotReachHere(); 1966 } 1967 int size = __ offset() - offset; 1968 #ifdef ASSERT 1969 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1970 assert(!do_size || size == 4, "incorrect size calculattion"); 1971 #endif 1972 return size; 1973 #ifndef PRODUCT 1974 } else if (!do_size) { 1975 switch (ireg) { 1976 case Op_VecS: 1977 case Op_VecD: 1978 case Op_VecX: 1979 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1980 break; 1981 case Op_VecY: 1982 case Op_VecZ: 1983 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1984 break; 1985 default: 1986 ShouldNotReachHere(); 1987 } 1988 #endif 1989 } 1990 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1991 return (UseAVX > 2) ? 6 : 4; 1992 } 1993 1994 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1995 int stack_offset, int reg, uint ireg, outputStream* st) { 1996 // In 64-bit VM size calculation is very complex. Emitting instructions 1997 // into scratch buffer is used to get size in 64-bit VM. 1998 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1999 if (cbuf) { 2000 MacroAssembler _masm(cbuf); 2001 int offset = __ offset(); 2002 if (is_load) { 2003 switch (ireg) { 2004 case Op_VecS: 2005 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2006 break; 2007 case Op_VecD: 2008 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2009 break; 2010 case Op_VecX: 2011 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2012 break; 2013 case Op_VecY: 2014 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2015 break; 2016 case Op_VecZ: 2017 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2018 break; 2019 default: 2020 ShouldNotReachHere(); 2021 } 2022 } else { // store 2023 switch (ireg) { 2024 case Op_VecS: 2025 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2026 break; 2027 case Op_VecD: 2028 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2029 break; 2030 case Op_VecX: 2031 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2032 break; 2033 case Op_VecY: 2034 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2035 break; 2036 case Op_VecZ: 2037 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2038 break; 2039 default: 2040 ShouldNotReachHere(); 2041 } 2042 } 2043 int size = __ offset() - offset; 2044 #ifdef ASSERT 2045 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2046 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2047 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2048 #endif 2049 return size; 2050 #ifndef PRODUCT 2051 } else if (!do_size) { 2052 if (is_load) { 2053 switch (ireg) { 2054 case Op_VecS: 2055 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2056 break; 2057 case Op_VecD: 2058 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2059 break; 2060 case Op_VecX: 2061 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2062 break; 2063 case Op_VecY: 2064 case Op_VecZ: 2065 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2066 break; 2067 default: 2068 ShouldNotReachHere(); 2069 } 2070 } else { // store 2071 switch (ireg) { 2072 case Op_VecS: 2073 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2074 break; 2075 case Op_VecD: 2076 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2077 break; 2078 case Op_VecX: 2079 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2080 break; 2081 case Op_VecY: 2082 case Op_VecZ: 2083 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2084 break; 2085 default: 2086 ShouldNotReachHere(); 2087 } 2088 } 2089 #endif 2090 } 2091 bool is_single_byte = false; 2092 int vec_len = 0; 2093 if ((UseAVX > 2) && (stack_offset != 0)) { 2094 int tuple_type = Assembler::EVEX_FVM; 2095 int input_size = Assembler::EVEX_32bit; 2096 switch (ireg) { 2097 case Op_VecS: 2098 tuple_type = Assembler::EVEX_T1S; 2099 break; 2100 case Op_VecD: 2101 tuple_type = Assembler::EVEX_T1S; 2102 input_size = Assembler::EVEX_64bit; 2103 break; 2104 case Op_VecX: 2105 break; 2106 case Op_VecY: 2107 vec_len = 1; 2108 break; 2109 case Op_VecZ: 2110 vec_len = 2; 2111 break; 2112 } 2113 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2114 } 2115 int offset_size = 0; 2116 int size = 5; 2117 if (UseAVX > 2 ) { 2118 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2119 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2120 size += 2; // Need an additional two bytes for EVEX encoding 2121 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2122 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2123 } else { 2124 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2125 size += 2; // Need an additional two bytes for EVEX encodding 2126 } 2127 } else { 2128 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2129 } | 1642 return offset; 1643 } 1644 1645 1646 //============================================================================= 1647 1648 // Float masks come from different places depending on platform. 1649 #ifdef _LP64 1650 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1651 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1652 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1653 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1654 #else 1655 static address float_signmask() { return (address)float_signmask_pool; } 1656 static address float_signflip() { return (address)float_signflip_pool; } 1657 static address double_signmask() { return (address)double_signmask_pool; } 1658 static address double_signflip() { return (address)double_signflip_pool; } 1659 #endif 1660 1661 1662 const bool Matcher::match_rule_supported(Opcodes opcode) { 1663 if (!has_match_rule(opcode)) 1664 return false; 1665 1666 bool ret_value = true; 1667 switch (opcode) { 1668 case Opcodes::Op_PopCountI: 1669 case Opcodes::Op_PopCountL: 1670 if (!UsePopCountInstruction) 1671 ret_value = false; 1672 break; 1673 case Opcodes::Op_MulVI: 1674 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1675 ret_value = false; 1676 break; 1677 case Opcodes::Op_MulVL: 1678 case Opcodes::Op_MulReductionVL: 1679 if (VM_Version::supports_avx512dq() == false) 1680 ret_value = false; 1681 break; 1682 case Opcodes::Op_AddReductionVL: 1683 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1684 ret_value = false; 1685 break; 1686 case Opcodes::Op_AddReductionVI: 1687 if (UseSSE < 3) // requires at least SSE3 1688 ret_value = false; 1689 break; 1690 case Opcodes::Op_MulReductionVI: 1691 if (UseSSE < 4) // requires at least SSE4 1692 ret_value = false; 1693 break; 1694 case Opcodes::Op_AddReductionVF: 1695 case Opcodes::Op_AddReductionVD: 1696 case Opcodes::Op_MulReductionVF: 1697 case Opcodes::Op_MulReductionVD: 1698 if (UseSSE < 1) // requires at least SSE 1699 ret_value = false; 1700 break; 1701 case Opcodes::Op_SqrtVD: 1702 if (UseAVX < 1) // enabled for AVX only 1703 ret_value = false; 1704 break; 1705 case Opcodes::Op_CompareAndSwapL: 1706 #ifdef _LP64 1707 case Opcodes::Op_CompareAndSwapP: 1708 #endif 1709 if (!VM_Version::supports_cx8()) 1710 ret_value = false; 1711 break; 1712 case Opcodes::Op_CMoveVD: 1713 if (UseAVX < 1 || UseAVX > 2) 1714 ret_value = false; 1715 break; 1716 case Opcodes::Op_StrIndexOf: 1717 if (!UseSSE42Intrinsics) 1718 ret_value = false; 1719 break; 1720 case Opcodes::Op_StrIndexOfChar: 1721 if (!UseSSE42Intrinsics) 1722 ret_value = false; 1723 break; 1724 case Opcodes::Op_OnSpinWait: 1725 if (VM_Version::supports_on_spin_wait() == false) 1726 ret_value = false; 1727 break; 1728 } 1729 1730 return ret_value; // Per default match rules are supported. 1731 } 1732 1733 const bool Matcher::match_rule_supported_vector(Opcodes opcode, int vlen) { 1734 // identify extra cases that we might want to provide match rules for 1735 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1736 bool ret_value = match_rule_supported(opcode); 1737 if (ret_value) { 1738 switch (opcode) { 1739 case Opcodes::Op_AddVB: 1740 case Opcodes::Op_SubVB: 1741 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1742 ret_value = false; 1743 break; 1744 case Opcodes::Op_URShiftVS: 1745 case Opcodes::Op_RShiftVS: 1746 case Opcodes::Op_LShiftVS: 1747 case Opcodes::Op_MulVS: 1748 case Opcodes::Op_AddVS: 1749 case Opcodes::Op_SubVS: 1750 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1751 ret_value = false; 1752 break; 1753 case Opcodes::Op_CMoveVD: 1754 if (vlen != 4) 1755 ret_value = false; 1756 break; 1757 } 1758 } 1759 1760 return ret_value; // Per default match rules are supported. 1761 } 1762 1763 const bool Matcher::has_predicated_vectors(void) { 1764 bool ret_value = false; 1765 if (UseAVX > 2) { 1766 ret_value = VM_Version::supports_avx512vl(); 1767 } 1768 1769 return ret_value; 1770 } 1771 1772 const int Matcher::float_pressure(int default_pressure_threshold) { 1773 int float_pressure_threshold = default_pressure_threshold; 1817 if (size < 4) return 0; 1818 break; 1819 default: 1820 ShouldNotReachHere(); 1821 } 1822 return size; 1823 } 1824 1825 // Limits on vector size (number of elements) loaded into vector. 1826 const int Matcher::max_vector_size(const BasicType bt) { 1827 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1828 } 1829 const int Matcher::min_vector_size(const BasicType bt) { 1830 int max_size = max_vector_size(bt); 1831 // Min size which can be loaded into vector is 4 bytes. 1832 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1833 return MIN2(size,max_size); 1834 } 1835 1836 // Vector ideal reg corresponding to specidied size in bytes 1837 const Opcodes Matcher::vector_ideal_reg(int size) { 1838 assert(MaxVectorSize >= size, ""); 1839 switch(size) { 1840 case 4: return Opcodes::Op_VecS; 1841 case 8: return Opcodes::Op_VecD; 1842 case 16: return Opcodes::Op_VecX; 1843 case 32: return Opcodes::Op_VecY; 1844 case 64: return Opcodes::Op_VecZ; 1845 } 1846 ShouldNotReachHere(); 1847 return Opcodes::Op_Node; 1848 } 1849 1850 // Only lowest bits of xmm reg are used for vector shift count. 1851 const Opcodes Matcher::vector_shift_count_ideal_reg(int size) { 1852 return Opcodes::Op_VecS; 1853 } 1854 1855 // x86 supports misaligned vectors store/load. 1856 const bool Matcher::misaligned_vectors_ok() { 1857 return !AlignVector; // can be changed by flag 1858 } 1859 1860 // x86 AES instructions are compatible with SunJCE expanded 1861 // keys, hence we do not need to pass the original key to stubs 1862 const bool Matcher::pass_original_key_for_aes() { 1863 return false; 1864 } 1865 1866 1867 const bool Matcher::convi2l_type_required = true; 1868 1869 // Check for shift by small constant as well 1870 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1871 if (shift->Opcode() == Opcodes::Op_LShiftX && shift->in(2)->is_Con() && 1872 shift->in(2)->get_int() <= 3 && 1873 // Are there other uses besides address expressions? 1874 !matcher->is_visited(shift)) { 1875 address_visited.set(shift->_idx); // Flag as address_visited 1876 mstack.push(shift->in(2), Matcher::Visit); 1877 Node *conv = shift->in(1); 1878 #ifdef _LP64 1879 // Allow Matcher to match the rule which bypass 1880 // ConvI2L operation for an array index on LP64 1881 // if the index value is positive. 1882 if (conv->Opcode() == Opcodes::Op_ConvI2L && 1883 conv->as_Type()->type()->is_long()->_lo >= 0 && 1884 // Are there other uses besides address expressions? 1885 !matcher->is_visited(conv)) { 1886 address_visited.set(conv->_idx); // Flag as address_visited 1887 mstack.push(conv->in(1), Matcher::Pre_Visit); 1888 } else 1889 #endif 1890 mstack.push(conv, Matcher::Pre_Visit); 1891 return true; 1892 } 1893 return false; 1894 } 1895 1896 // Should the Matcher clone shifts on addressing modes, expecting them 1897 // to be subsumed into complex addressing expressions or compute them 1898 // into registers? 1899 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1900 Node *off = m->in(AddPNode::Offset); 1901 if (off->is_Con()) { 1902 address_visited.test_set(m->_idx); // Flag as address_visited 1921 } 1922 1923 // Clone X+offset as it also folds into most addressing expressions 1924 mstack.push(off, Visit); 1925 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1926 return true; 1927 } else if (clone_shift(off, this, mstack, address_visited)) { 1928 address_visited.test_set(m->_idx); // Flag as address_visited 1929 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1930 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1931 return true; 1932 } 1933 return false; 1934 } 1935 1936 void Compile::reshape_address(AddPNode* addp) { 1937 } 1938 1939 // Helper methods for MachSpillCopyNode::implementation(). 1940 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1941 int src_hi, int dst_hi, Opcodes ireg, outputStream* st) { 1942 // In 64-bit VM size calculation is very complex. Emitting instructions 1943 // into scratch buffer is used to get size in 64-bit VM. 1944 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1945 assert(ireg == Opcodes::Op_VecS || // 32bit vector 1946 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1947 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1948 "no non-adjacent vector moves" ); 1949 if (cbuf) { 1950 MacroAssembler _masm(cbuf); 1951 int offset = __ offset(); 1952 switch (ireg) { 1953 case Opcodes::Op_VecS: // copy whole register 1954 case Opcodes::Op_VecD: 1955 case Opcodes::Op_VecX: 1956 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1957 break; 1958 case Opcodes::Op_VecY: 1959 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1960 break; 1961 case Opcodes::Op_VecZ: 1962 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1963 break; 1964 default: 1965 ShouldNotReachHere(); 1966 } 1967 int size = __ offset() - offset; 1968 #ifdef ASSERT 1969 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1970 assert(!do_size || size == 4, "incorrect size calculattion"); 1971 #endif 1972 return size; 1973 #ifndef PRODUCT 1974 } else if (!do_size) { 1975 switch (ireg) { 1976 case Opcodes::Op_VecS: 1977 case Opcodes::Op_VecD: 1978 case Opcodes::Op_VecX: 1979 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1980 break; 1981 case Opcodes::Op_VecY: 1982 case Opcodes::Op_VecZ: 1983 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1984 break; 1985 default: 1986 ShouldNotReachHere(); 1987 } 1988 #endif 1989 } 1990 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1991 return (UseAVX > 2) ? 6 : 4; 1992 } 1993 1994 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1995 int stack_offset, int reg, Opcodes ireg, outputStream* st) { 1996 // In 64-bit VM size calculation is very complex. Emitting instructions 1997 // into scratch buffer is used to get size in 64-bit VM. 1998 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1999 if (cbuf) { 2000 MacroAssembler _masm(cbuf); 2001 int offset = __ offset(); 2002 if (is_load) { 2003 switch (ireg) { 2004 case Opcodes::Op_VecS: 2005 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2006 break; 2007 case Opcodes::Op_VecD: 2008 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2009 break; 2010 case Opcodes::Op_VecX: 2011 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2012 break; 2013 case Opcodes::Op_VecY: 2014 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2015 break; 2016 case Opcodes::Op_VecZ: 2017 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2018 break; 2019 default: 2020 ShouldNotReachHere(); 2021 } 2022 } else { // store 2023 switch (ireg) { 2024 case Opcodes::Op_VecS: 2025 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2026 break; 2027 case Opcodes::Op_VecD: 2028 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2029 break; 2030 case Opcodes::Op_VecX: 2031 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2032 break; 2033 case Opcodes::Op_VecY: 2034 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2035 break; 2036 case Opcodes::Op_VecZ: 2037 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2038 break; 2039 default: 2040 ShouldNotReachHere(); 2041 } 2042 } 2043 int size = __ offset() - offset; 2044 #ifdef ASSERT 2045 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2046 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2047 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2048 #endif 2049 return size; 2050 #ifndef PRODUCT 2051 } else if (!do_size) { 2052 if (is_load) { 2053 switch (ireg) { 2054 case Opcodes::Op_VecS: 2055 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2056 break; 2057 case Opcodes::Op_VecD: 2058 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2059 break; 2060 case Opcodes::Op_VecX: 2061 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2062 break; 2063 case Opcodes::Op_VecY: 2064 case Opcodes::Op_VecZ: 2065 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2066 break; 2067 default: 2068 ShouldNotReachHere(); 2069 } 2070 } else { // store 2071 switch (ireg) { 2072 case Opcodes::Op_VecS: 2073 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2074 break; 2075 case Opcodes::Op_VecD: 2076 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2077 break; 2078 case Opcodes::Op_VecX: 2079 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2080 break; 2081 case Opcodes::Op_VecY: 2082 case Opcodes::Op_VecZ: 2083 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2084 break; 2085 default: 2086 ShouldNotReachHere(); 2087 } 2088 } 2089 #endif 2090 } 2091 bool is_single_byte = false; 2092 int vec_len = 0; 2093 if ((UseAVX > 2) && (stack_offset != 0)) { 2094 int tuple_type = Assembler::EVEX_FVM; 2095 int input_size = Assembler::EVEX_32bit; 2096 switch (ireg) { 2097 case Opcodes::Op_VecS: 2098 tuple_type = Assembler::EVEX_T1S; 2099 break; 2100 case Opcodes::Op_VecD: 2101 tuple_type = Assembler::EVEX_T1S; 2102 input_size = Assembler::EVEX_64bit; 2103 break; 2104 case Opcodes::Op_VecX: 2105 break; 2106 case Opcodes::Op_VecY: 2107 vec_len = 1; 2108 break; 2109 case Opcodes::Op_VecZ: 2110 vec_len = 2; 2111 break; 2112 } 2113 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2114 } 2115 int offset_size = 0; 2116 int size = 5; 2117 if (UseAVX > 2 ) { 2118 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2119 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2120 size += 2; // Need an additional two bytes for EVEX encoding 2121 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2122 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2123 } else { 2124 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2125 size += 2; // Need an additional two bytes for EVEX encodding 2126 } 2127 } else { 2128 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2129 } |