1 // Inferno utils/6l/span.c
2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
45 plan9privates *obj.LSym
48 // Instruction layout.
50 // Loop alignment constants:
51 // want to align loop entry to loopAlign-byte boundary,
52 // and willing to insert at most maxLoopPad bytes of NOP to do so.
53 // We define a loop entry as the target of a backward jump.
55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
56 // and it aligns all jump targets, not just backward jump targets.
58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
59 // is very slight but negative, so the alignment is disabled by
60 // setting MaxLoopPad = 0. The code is here for reference and
61 // for future experiments.
67 // Bit flags that are used to express jump target properties.
69 // branchBackwards marks targets that are located behind.
70 // Used to express jumps to loop headers.
71 branchBackwards = (1 << iota)
72 // branchShort marks branches those target is close,
73 // with offset is in -128..127 range.
75 // branchLoopHead marks loop entry.
76 // Used to insert padding for misaligned loops.
80 // opBytes holds optab encoding bytes.
81 // Each ytab reserves fixed amount of bytes in this array.
83 // The size should be the minimal number of bytes that
84 // are enough to hold biggest optab op lines.
85 type opBytes [31]uint8
108 Yu2 // $x, x fits in uint2
109 Yi8 // $x, x fits in int8
110 Yu8 // $x, x fits in uint8
111 Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
122 Yrl32 // Yrl on 32-bit system
168 Yxr0 // X0 only. "<XMM0>" notation in Intel manual.
169 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
173 YxmEvex // YxrEvex+Ym
174 Yxvm // VSIB vector array; vm32x/vm64x
175 YxvmEvex // Yxvm which permits High-16 X register as index.
176 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
180 YymEvex // YyrEvex+Ym
181 Yyvm // VSIB vector array; vm32y/vm64y
182 YyvmEvex // Yyvm which permits High-16 Y register as index.
183 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
186 Yzvm // VSIB vector array; vm32z/vm64z
188 Yknot0 // K1..K7; write mask
189 Yk // K0..K7; used for KOP
190 Ykm // Yk+Ym; used for KOP
230 Zibm_r // mmx1,mmx2/mem64,imm8
284 Px1 = 1 // symbolic; exact value doesn't matter
285 P32 = 0x32 // 32-bit only
286 Pe = 0x66 // operand escape
287 Pm = 0x0f // 2byte opcode escape
288 Pq = 0xff // both escapes: 66 0f
289 Pb = 0xfe // byte operands
290 Pf2 = 0xf2 // xmm escape 1: f2 0f
291 Pf3 = 0xf3 // xmm escape 2: f3 0f
292 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
293 Pq3 = 0x67 // xmm escape 3: 66 48 0f
294 Pq4 = 0x68 // xmm escape 4: 66 0F 38
295 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
296 Pq5 = 0x6a // xmm escape 5: F3 0F 38
297 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
298 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f
300 Pw8 = 0x90 // symbolic; exact value doesn't matter
301 Py = 0x80 // defaults to 64-bit mode
302 Py1 = 0x81 // symbolic; exact value doesn't matter
303 Py3 = 0x83 // symbolic; exact value doesn't matter
304 Pavx = 0x84 // symbolic: exact value doesn't matter
306 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
307 Rxw = 1 << 3 // =1, 64-bit operand size
308 Rxr = 1 << 2 // extend modrm reg
309 Rxx = 1 << 1 // extend sib index
310 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg
314 // Encoding for VEX prefix in tables.
315 // The P, L, and W fields are chosen to match
316 // their eventual locations in the VEX prefix bytes.
318 // Encoding for VEX prefix in tables.
319 // The P, L, and W fields are chosen to match
320 // their eventual locations in the VEX prefix bytes.
322 // Using spare bit to make leading [E]VEX encoding byte different from
323 // 0x0f even if all other VEX fields are 0.
339 // M field - 5 bits, but mostly reserved; we can store up to 3
345 var ycover [Ymax * Ymax]uint8
349 var regrex [MAXREG + 1]int
352 {Zlit, 1, argList{}},
356 {Zpseudo, 0, argList{Ymb, Ytextsize}},
357 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
361 {Zpseudo, 0, argList{}},
362 {Zpseudo, 0, argList{Yiauto}},
363 {Zpseudo, 0, argList{Yml}},
364 {Zpseudo, 0, argList{Yrf}},
365 {Zpseudo, 0, argList{Yxr}},
366 {Zpseudo, 0, argList{Yiauto}},
367 {Zpseudo, 0, argList{Yml}},
368 {Zpseudo, 0, argList{Yrf}},
369 {Zpseudo, 1, argList{Yxr}},
372 var yfuncdata = []ytab{
373 {Zpseudo, 0, argList{Yi32, Ym}},
376 var ypcdata = []ytab{
377 {Zpseudo, 0, argList{Yi32, Yi32}},
381 {Zib_, 1, argList{Yi32, Yal}},
382 {Zibo_m, 2, argList{Yi32, Ymb}},
383 {Zr_m, 1, argList{Yrb, Ymb}},
384 {Zm_r, 1, argList{Ymb, Yrb}},
388 {Zibo_m, 2, argList{Yi8, Yml}},
389 {Zil_, 1, argList{Yi32, Yax}},
390 {Zilo_m, 2, argList{Yi32, Yml}},
391 {Zr_m, 1, argList{Yrl, Yml}},
392 {Zm_r, 1, argList{Yml, Yrl}},
396 {Z_rp, 1, argList{Yrl}},
397 {Zo_m, 2, argList{Yml}},
401 {Zo_m, 2, argList{Yml}},
405 {Z_ib, 1, argList{Yal, Yi32}},
406 {Zm_ibo, 2, argList{Ymb, Yi32}},
407 {Zm_r, 1, argList{Ymb, Yrb}},
408 {Zr_m, 1, argList{Yrb, Ymb}},
412 {Zm_ibo, 2, argList{Yml, Yi8}},
413 {Z_il, 1, argList{Yax, Yi32}},
414 {Zm_ilo, 2, argList{Yml, Yi32}},
415 {Zm_r, 1, argList{Yml, Yrl}},
416 {Zr_m, 1, argList{Yrl, Yml}},
420 {Zo_m, 2, argList{Yi1, Ymb}},
421 {Zibo_m, 2, argList{Yu8, Ymb}},
422 {Zo_m, 2, argList{Ycx, Ymb}},
426 {Zo_m, 2, argList{Yi1, Yml}},
427 {Zibo_m, 2, argList{Yu8, Yml}},
428 {Zo_m, 2, argList{Ycl, Yml}},
429 {Zo_m, 2, argList{Ycx, Yml}},
433 {Zil_, 1, argList{Yi32, Yax}},
434 {Zilo_m, 2, argList{Yi32, Yml}},
435 {Zr_m, 1, argList{Yrl, Yml}},
436 {Zm_r, 1, argList{Yml, Yrl}},
440 {Zr_m, 1, argList{Yrb, Ymb}},
441 {Zm_r, 1, argList{Ymb, Yrb}},
442 {Zib_rp, 1, argList{Yi32, Yrb}},
443 {Zibo_m, 2, argList{Yi32, Ymb}},
447 {Zibo_m, 2, argList{Yi8, Yml}},
448 {Zr_m, 1, argList{Yrl, Yml}},
452 {Zr_m, 1, argList{Yrl, Yml}},
453 {Zm_r, 1, argList{Yml, Yrl}},
454 {Zil_rp, 1, argList{Yi32, Yrl}},
455 {Zilo_m, 2, argList{Yi32, Yml}},
456 {Zaut_r, 2, argList{Yiauto, Yrl}},
460 {Zr_m, 1, argList{Yrl, Yml}},
461 {Zm_r, 1, argList{Yml, Yrl}},
462 {Zil_rp, 1, argList{Yi32, Yrl}},
463 {Zilo_m, 2, argList{Yi32, Yml}},
464 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
465 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
466 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
467 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
468 {Zaut_r, 2, argList{Yiauto, Yrl}},
472 {Zo_iw, 1, argList{}},
473 {Zo_iw, 1, argList{Yi32}},
477 // valid in 32-bit mode
478 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding)
479 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ
480 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
481 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
482 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
484 // valid only in 64-bit mode, usually with 64-bit prefix
485 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89
486 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b
487 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0)
488 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate
489 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0)
490 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD
491 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD
492 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load
493 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store
494 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
498 {Zlitm_r, 3, argList{Ym, Yrl}},
499 {Zlitr_m, 3, argList{Yrl, Ym}},
503 {Zm_r, 1, argList{Ym, Yrl}},
507 {Zr_m, 1, argList{Yrl, Ym}},
511 {Zmb_r, 1, argList{Ymb, Yrl}},
515 {Zm_r, 1, argList{Yml, Yrl}},
519 {Zr_m, 1, argList{Yrl, Yml}},
523 {Zr_m, 1, argList{Yrb, Ymb}},
524 {Zm_r, 1, argList{Ymb, Yrb}},
528 {Zr_m, 1, argList{Yrb, Ymb}},
532 {Z_rp, 1, argList{Yax, Yrl}},
533 {Zrp_, 1, argList{Yrl, Yax}},
534 {Zr_m, 1, argList{Yrl, Yml}},
535 {Zm_r, 1, argList{Yml, Yrl}},
539 {Zm_o, 2, argList{Yml}},
543 {Zm_o, 2, argList{Ymb}},
547 {Zm_o, 2, argList{Yml}},
548 {Zib_rr, 1, argList{Yi8, Yrl}},
549 {Zil_rr, 1, argList{Yi32, Yrl}},
550 {Zm_r, 2, argList{Yml, Yrl}},
554 {Zibm_r, 2, argList{Yi8, Yml, Yrl}},
555 {Zibm_r, 2, argList{Yi32, Yml, Yrl}},
559 {Zbyte, 1, argList{Yi64}},
563 {Zib_, 1, argList{Yi32}},
564 {Zlit, 1, argList{}},
568 {Zib_, 1, argList{Yi32}},
572 {Zrp_, 1, argList{Yrl}},
573 {Zm_o, 2, argList{Ym}},
574 {Zib_, 1, argList{Yi8}},
575 {Zil_, 1, argList{Yi32}},
579 {Z_rp, 1, argList{Yrl}},
580 {Zo_m, 2, argList{Ym}},
583 var ywrfsbase = []ytab{
584 {Zm_o, 2, argList{Yrl}},
587 var yrdrand = []ytab{
588 {Zo_m, 2, argList{Yrl}},
591 var yclflush = []ytab{
592 {Zo_m, 2, argList{Ym}},
596 {Z_rp, 2, argList{Yrl}},
600 {Zo_m, 2, argList{Ymb}},
604 {Zbr, 0, argList{Ybr}},
605 {Zbr, 0, argList{Yi0, Ybr}},
606 {Zbr, 1, argList{Yi1, Ybr}},
610 {Zloop, 1, argList{Ybr}},
614 {Zcallindreg, 0, argList{Yml}},
615 {Zcallindreg, 2, argList{Yrx, Yrx}},
616 {Zcallind, 2, argList{Yindir}},
617 {Zcall, 0, argList{Ybr}},
618 {Zcallcon, 1, argList{Yi32}},
622 {Zcallduff, 1, argList{Yi32}},
626 {Zo_m64, 2, argList{Yml}},
627 {Zjmp, 0, argList{Ybr}},
628 {Zjmpcon, 1, argList{Yi32}},
632 {Zm_o, 2, argList{Ym, Yf0}},
633 {Zo_m, 2, argList{Yf0, Ym}},
634 {Zm_o, 2, argList{Yrf, Yf0}},
635 {Zo_m, 2, argList{Yf0, Yrf}},
639 {Zo_m, 2, argList{Yf0, Ym}},
640 {Zo_m, 2, argList{Yf0, Yrf}},
644 {Zm_o, 2, argList{Ym, Yf0}},
645 {Zo_m, 2, argList{Yf0, Ym}},
649 {Zm_o, 2, argList{Ym, Yf0}},
653 {Zo_m, 2, argList{Yf0, Ym}},
657 {Zm_o, 2, argList{Yrf, Yf0}},
661 {Zm_o, 2, argList{Ym, Yf0}},
662 {Zm_o, 2, argList{Yrf, Yf0}},
663 {Zo_m, 2, argList{Yf0, Yrf}},
667 {Zo_m, 2, argList{Yf0, Yrf}},
668 {Zm_o, 2, argList{Yrf, Yf0}},
672 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
676 {Zo_m, 2, argList{Ym}},
677 {Zlit, 1, argList{Yax}},
680 var ysvrs_mo = []ytab{
681 {Zm_o, 2, argList{Ym}},
684 // unaryDst version of "ysvrs_mo".
685 var ysvrs_om = []ytab{
686 {Zo_m, 2, argList{Ym}},
690 {Zm_r_xm, 1, argList{Ymm, Ymr}},
691 {Zm_r_xm, 2, argList{Yxm, Yxr}},
695 {Zm_r_xm, 1, argList{Yxm, Yxr}},
699 {Zm_r, 1, argList{Yxm, Yxr}},
703 {Zm_r_xm, 2, argList{Yxm, Yxr}},
704 {Zm_r_xm, 2, argList{Yxm, Ymr}},
708 {Zm_r_xm, 2, argList{Yxm, Yxr}},
709 {Zm_r_xm, 2, argList{Ymm, Yxr}},
713 {Zm_r_xm, 1, argList{Yxr, Yxr}},
717 {Zr_m_xm, 1, argList{Yxr, Yml}},
721 {Zm_r, 1, argList{Ymr, Ymr}},
725 {Zr_m_xm, 1, argList{Ymr, Yml}},
729 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
733 {Zm_r_xm, 1, argList{Yxm, Yxr}},
734 {Zr_m_xm, 1, argList{Yxr, Yxm}},
738 {Zm_r_xm, 1, argList{Yxm, Yrl}},
742 {Zm_r_xm, 1, argList{Yml, Yxr}},
746 {Zm_r_xm, 2, argList{Yxm, Yrl}},
750 {Zm_r_xm, 2, argList{Yml, Yxr}},
754 {Zm_r_xm, 1, argList{Ymm, Ymr}},
755 {Zibo_m_xm, 2, argList{Yi8, Ymr}},
756 {Zm_r_xm, 2, argList{Yxm, Yxr}},
757 {Zibo_m_xm, 3, argList{Yi8, Yxr}},
761 {Zm_r, 1, argList{Yxr, Yrl}},
765 {Zm_r, 1, argList{Ymr, Yxr}},
766 {Zm_r_xm, 1, argList{Yxm, Yxr}},
770 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
773 var ymshufb = []ytab{
774 {Zm2_r, 2, argList{Yxm, Yxr}},
777 // It should never have more than 1 entry,
778 // because some optab entries have opcode sequences that
779 // are longer than 2 bytes (zoffset=2 here),
780 // ROUNDPD and ROUNDPS and recently added BLENDPD,
783 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
787 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
788 {Zibr_m, 2, argList{Yu8, Yxr, Yml}},
792 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
796 {Zibm_r, 2, argList{Yu8, Yml, Yxr}},
800 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
804 {Zibo_m, 2, argList{Yi8, Yxr}},
808 {Zm_r_xm, 2, argList{Yxr, Yrl}},
809 {Zm_r_xm, 1, argList{Ymr, Yrl}},
812 var ycrc32l = []ytab{
813 {Zlitm_r, 0, argList{Yml, Yrl}},
816 var ycrc32b = []ytab{
817 {Zlitm_r, 0, argList{Ymb, Yrl}},
820 var yprefetch = []ytab{
821 {Zm_o, 2, argList{Ym}},
825 {Zlitm_r, 2, argList{Yxm, Yxr}},
828 var yxbegin = []ytab{
829 {Zjmp, 1, argList{Ybr}},
832 var yxabort = []ytab{
833 {Zib_, 1, argList{Yu8}},
837 {Zm_r, 1, argList{Ym, Yxr}},
840 var ypalignr = []ytab{
841 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
844 var ysha256rnds2 = []ytab{
845 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
848 var yblendvpd = []ytab{
849 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
852 var ymmxmm0f38 = []ytab{
853 {Zlitm_r, 3, argList{Ymm, Ymr}},
854 {Zlitm_r, 5, argList{Yxm, Yxr}},
857 var yextractps = []ytab{
858 {Zibr_m, 2, argList{Yu2, Yxr, Yml}},
861 var ysha1rnds4 = []ytab{
862 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
865 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
866 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab
867 // to find the entry with the given p.As and then looks through the ytable for
868 // that instruction (the second field in the optab struct) for a line whose
869 // first two values match the Ytypes of the p.From and p.To operands. The
870 // function oclass computes the specific Ytype of an operand and then the set
871 // of more general Ytypes that it satisfies is implied by the ycover table, set
872 // up in instinit. For example, oclass distinguishes the constants 0 and 1
873 // from the more general 8-bit constants, but instinit says
875 // ycover[Yi0*Ymax+Ys32] = 1
876 // ycover[Yi1*Ymax+Ys32] = 1
877 // ycover[Yi8*Ymax+Ys32] = 1
879 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
880 // if that's what an instruction can handle.
882 // In parallel with the scan through the ytable for the appropriate line, there
883 // is a z pointer that starts out pointing at the strange magic byte list in
884 // the Optab struct. With each step past a non-matching ytable line, z
885 // advances by the 4th entry in the line. When a matching line is found, that
886 // z pointer has the extra data to use in laying down the instruction bytes.
887 // The actual bytes laid down are a function of the 3rd entry in the line (that
888 // is, the Ztype) and the z bytes.
890 // For example, let's look at AADDL. The optab line says:
892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
896 // var yaddl = []ytab{
897 // {Yi8, Ynone, Yml, Zibo_m, 2},
898 // {Yi32, Ynone, Yax, Zil_, 1},
899 // {Yi32, Ynone, Yml, Zilo_m, 2},
900 // {Yrl, Ynone, Yml, Zr_m, 1},
901 // {Yml, Ynone, Yrl, Zm_r, 1},
904 // so there are 5 possible types of ADDL instruction that can be laid down, and
905 // possible states used to lay them down (Ztype and z pointer, assuming z
906 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
908 // Yi8, Yml -> Zibo_m, z (0x83, 00)
909 // Yi32, Yax -> Zil_, z+2 (0x05)
910 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
911 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
912 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
914 // The Pconstant in the optab line controls the prefix bytes to emit. That's
915 // relatively straightforward as this program goes.
917 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for
918 // example, is an opcode byte (z[0]) then an asmando (which is some kind of
919 // encoded addressing mode for the Yml arg), and then a single immediate byte.
920 // Zilo_m is the same but a long (32-bit) immediate.
922 // as, ytab, andproto, opcode
924 {obj.AXXX, nil, 0, opBytes{}},
925 {AAAA, ynone, P32, opBytes{0x37}},
926 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
927 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
928 {AAAS, ynone, P32, opBytes{0x3f}},
929 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
930 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
931 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
932 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
933 {AADCXL, yml_rl, Pq4, opBytes{0xf6}},
934 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
935 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
936 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
937 {AADDPD, yxm, Pq, opBytes{0x58}},
938 {AADDPS, yxm, Pm, opBytes{0x58}},
939 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
940 {AADDSD, yxm, Pf2, opBytes{0x58}},
941 {AADDSS, yxm, Pf3, opBytes{0x58}},
942 {AADDSUBPD, yxm, Pq, opBytes{0xd0}},
943 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
944 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
945 {AADOXL, yml_rl, Pq5, opBytes{0xf6}},
946 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
947 {AADJSP, nil, 0, opBytes{}},
948 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
949 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
950 {AANDNPD, yxm, Pq, opBytes{0x55}},
951 {AANDNPS, yxm, Pm, opBytes{0x55}},
952 {AANDPD, yxm, Pq, opBytes{0x54}},
953 {AANDPS, yxm, Pm, opBytes{0x54}},
954 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
955 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
956 {AARPL, yrl_ml, P32, opBytes{0x63}},
957 {ABOUNDL, yrl_m, P32, opBytes{0x62}},
958 {ABOUNDW, yrl_m, Pe, opBytes{0x62}},
959 {ABSFL, yml_rl, Pm, opBytes{0xbc}},
960 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
961 {ABSFW, yml_rl, Pq, opBytes{0xbc}},
962 {ABSRL, yml_rl, Pm, opBytes{0xbd}},
963 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
964 {ABSRW, yml_rl, Pq, opBytes{0xbd}},
965 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
966 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
967 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
968 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
969 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
970 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
971 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
972 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
973 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
974 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
975 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
976 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
977 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
978 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
979 {ABYTE, ybyte, Px, opBytes{1}},
980 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
981 {ACBW, ynone, Pe, opBytes{0x98}},
982 {ACDQ, ynone, Px, opBytes{0x99}},
983 {ACDQE, ynone, Pw, opBytes{0x98}},
984 {ACLAC, ynone, Pm, opBytes{01, 0xca}},
985 {ACLC, ynone, Px, opBytes{0xf8}},
986 {ACLD, ynone, Px, opBytes{0xfc}},
987 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
988 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
989 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
990 {ACLI, ynone, Px, opBytes{0xfa}},
991 {ACLTS, ynone, Pm, opBytes{0x06}},
992 {ACLWB, yclflush, Pq, opBytes{0xae, 06}},
993 {ACMC, ynone, Px, opBytes{0xf5}},
994 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
995 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
996 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
997 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
998 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
999 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
1000 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
1001 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
1002 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
1003 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
1004 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
1005 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
1006 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
1007 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
1008 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
1009 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
1010 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
1011 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
1012 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
1013 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
1014 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
1015 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
1016 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
1017 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
1018 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
1019 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
1020 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
1021 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
1022 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
1023 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
1024 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
1025 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
1026 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
1027 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
1028 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
1029 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
1030 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
1031 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
1032 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
1033 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
1034 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
1035 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
1036 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
1037 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
1038 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
1039 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
1040 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
1041 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
1042 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
1043 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1044 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
1045 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
1046 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1047 {ACMPSB, ynone, Pb, opBytes{0xa6}},
1048 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
1049 {ACMPSL, ynone, Px, opBytes{0xa7}},
1050 {ACMPSQ, ynone, Pw, opBytes{0xa7}},
1051 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
1052 {ACMPSW, ynone, Pe, opBytes{0xa7}},
1053 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1054 {ACOMISD, yxm, Pe, opBytes{0x2f}},
1055 {ACOMISS, yxm, Pm, opBytes{0x2f}},
1056 {ACPUID, ynone, Pm, opBytes{0xa2}},
1057 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
1058 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
1059 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
1060 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
1061 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
1062 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
1063 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
1064 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
1065 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
1066 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
1067 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
1068 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
1069 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
1070 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
1071 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
1072 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
1073 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
1074 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
1075 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
1076 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
1077 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
1078 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
1079 {ACWD, ynone, Pe, opBytes{0x99}},
1080 {ACWDE, ynone, Px, opBytes{0x98}},
1081 {ACQO, ynone, Pw, opBytes{0x99}},
1082 {ADAA, ynone, P32, opBytes{0x27}},
1083 {ADAS, ynone, P32, opBytes{0x2f}},
1084 {ADECB, yscond, Pb, opBytes{0xfe, 01}},
1085 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
1086 {ADECQ, yincq, Pw, opBytes{0xff, 01}},
1087 {ADECW, yincq, Pe, opBytes{0xff, 01}},
1088 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
1089 {ADIVL, ydivl, Px, opBytes{0xf7, 06}},
1090 {ADIVPD, yxm, Pe, opBytes{0x5e}},
1091 {ADIVPS, yxm, Pm, opBytes{0x5e}},
1092 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
1093 {ADIVSD, yxm, Pf2, opBytes{0x5e}},
1094 {ADIVSS, yxm, Pf3, opBytes{0x5e}},
1095 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
1096 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
1097 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
1098 {AEMMS, ynone, Pm, opBytes{0x77}},
1099 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
1100 {AENTER, nil, 0, opBytes{}}, // botch
1101 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
1102 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
1103 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
1104 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
1105 {AHLT, ynone, Px, opBytes{0xf4}},
1106 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
1107 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
1108 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
1109 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
1110 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
1111 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1112 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1113 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1114 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
1115 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
1116 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
1117 {AINB, yin, Pb, opBytes{0xe4, 0xec}},
1118 {AINW, yin, Pe, opBytes{0xe5, 0xed}},
1119 {AINL, yin, Px, opBytes{0xe5, 0xed}},
1120 {AINCB, yscond, Pb, opBytes{0xfe, 00}},
1121 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
1122 {AINCQ, yincq, Pw, opBytes{0xff, 00}},
1123 {AINCW, yincq, Pe, opBytes{0xff, 00}},
1124 {AINSB, ynone, Pb, opBytes{0x6c}},
1125 {AINSL, ynone, Px, opBytes{0x6d}},
1126 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
1127 {AINSW, ynone, Pe, opBytes{0x6d}},
1128 {AICEBP, ynone, Px, opBytes{0xf1}},
1129 {AINT, yint, Px, opBytes{0xcd}},
1130 {AINTO, ynone, P32, opBytes{0xce}},
1131 {AIRETL, ynone, Px, opBytes{0xcf}},
1132 {AIRETQ, ynone, Pw, opBytes{0xcf}},
1133 {AIRETW, ynone, Pe, opBytes{0xcf}},
1134 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
1135 {AJCS, yjcond, Px, opBytes{0x72, 0x82}},
1136 {AJCXZL, yloop, Px, opBytes{0xe3}},
1137 {AJCXZW, yloop, Px, opBytes{0xe3}},
1138 {AJCXZQ, yloop, Px, opBytes{0xe3}},
1139 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
1140 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
1141 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
1142 {AJHI, yjcond, Px, opBytes{0x77, 0x87}},
1143 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
1144 {AJLS, yjcond, Px, opBytes{0x76, 0x86}},
1145 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
1146 {AJMI, yjcond, Px, opBytes{0x78, 0x88}},
1147 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
1148 {AJNE, yjcond, Px, opBytes{0x75, 0x85}},
1149 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
1150 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
1151 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
1152 {AJPL, yjcond, Px, opBytes{0x79, 0x89}},
1153 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
1154 {AHADDPD, yxm, Pq, opBytes{0x7c}},
1155 {AHADDPS, yxm, Pf2, opBytes{0x7c}},
1156 {AHSUBPD, yxm, Pq, opBytes{0x7d}},
1157 {AHSUBPS, yxm, Pf2, opBytes{0x7d}},
1158 {ALAHF, ynone, Px, opBytes{0x9f}},
1159 {ALARL, yml_rl, Pm, opBytes{0x02}},
1160 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
1161 {ALARW, yml_rl, Pq, opBytes{0x02}},
1162 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
1163 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
1164 {ALEAL, ym_rl, Px, opBytes{0x8d}},
1165 {ALEAQ, ym_rl, Pw, opBytes{0x8d}},
1166 {ALEAVEL, ynone, P32, opBytes{0xc9}},
1167 {ALEAVEQ, ynone, Py, opBytes{0xc9}},
1168 {ALEAVEW, ynone, Pe, opBytes{0xc9}},
1169 {ALEAW, ym_rl, Pe, opBytes{0x8d}},
1170 {ALOCK, ynone, Px, opBytes{0xf0}},
1171 {ALODSB, ynone, Pb, opBytes{0xac}},
1172 {ALODSL, ynone, Px, opBytes{0xad}},
1173 {ALODSQ, ynone, Pw, opBytes{0xad}},
1174 {ALODSW, ynone, Pe, opBytes{0xad}},
1175 {ALONG, ybyte, Px, opBytes{4}},
1176 {ALOOP, yloop, Px, opBytes{0xe2}},
1177 {ALOOPEQ, yloop, Px, opBytes{0xe1}},
1178 {ALOOPNE, yloop, Px, opBytes{0xe0}},
1179 {ALTR, ydivl, Pm, opBytes{0x00, 03}},
1180 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
1181 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
1182 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
1183 {ALSLL, yml_rl, Pm, opBytes{0x03}},
1184 {ALSLW, yml_rl, Pq, opBytes{0x03}},
1185 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
1186 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
1187 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
1188 {AMAXPD, yxm, Pe, opBytes{0x5f}},
1189 {AMAXPS, yxm, Pm, opBytes{0x5f}},
1190 {AMAXSD, yxm, Pf2, opBytes{0x5f}},
1191 {AMAXSS, yxm, Pf3, opBytes{0x5f}},
1192 {AMINPD, yxm, Pe, opBytes{0x5d}},
1193 {AMINPS, yxm, Pm, opBytes{0x5d}},
1194 {AMINSD, yxm, Pf2, opBytes{0x5d}},
1195 {AMINSS, yxm, Pf3, opBytes{0x5d}},
1196 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
1197 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
1198 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
1199 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
1200 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
1201 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
1202 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
1203 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
1204 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
1205 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
1206 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
1207 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
1208 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
1209 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
1210 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
1211 {AMOVHLPS, yxr, Pm, opBytes{0x12}},
1212 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
1213 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
1214 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1215 {AMOVLHPS, yxr, Pm, opBytes{0x16}},
1216 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
1217 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
1218 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
1219 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
1220 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
1221 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
1222 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
1223 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
1224 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
1225 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
1226 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
1227 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1228 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
1229 {AMOVSB, ynone, Pb, opBytes{0xa4}},
1230 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
1231 {AMOVSL, ynone, Px, opBytes{0xa5}},
1232 {AMOVSQ, ynone, Pw, opBytes{0xa5}},
1233 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
1234 {AMOVSW, ynone, Pe, opBytes{0xa5}},
1235 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
1236 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
1237 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
1238 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
1239 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
1240 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
1241 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
1242 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
1243 {AMULB, ydivb, Pb, opBytes{0xf6, 04}},
1244 {AMULL, ydivl, Px, opBytes{0xf7, 04}},
1245 {AMULPD, yxm, Pe, opBytes{0x59}},
1246 {AMULPS, yxm, Ym, opBytes{0x59}},
1247 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
1248 {AMULSD, yxm, Pf2, opBytes{0x59}},
1249 {AMULSS, yxm, Pf3, opBytes{0x59}},
1250 {AMULW, ydivl, Pe, opBytes{0xf7, 04}},
1251 {ANEGB, yscond, Pb, opBytes{0xf6, 03}},
1252 {ANEGL, yscond, Px, opBytes{0xf7, 03}},
1253 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
1254 {ANEGW, yscond, Pe, opBytes{0xf7, 03}},
1255 {obj.ANOP, ynop, Px, opBytes{0, 0}},
1256 {ANOTB, yscond, Pb, opBytes{0xf6, 02}},
1257 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
1258 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
1259 {ANOTW, yscond, Pe, opBytes{0xf7, 02}},
1260 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
1261 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1262 {AORPD, yxm, Pq, opBytes{0x56}},
1263 {AORPS, yxm, Pm, opBytes{0x56}},
1264 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1265 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1266 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
1267 {AOUTL, yin, Px, opBytes{0xe7, 0xef}},
1268 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
1269 {AOUTSB, ynone, Pb, opBytes{0x6e}},
1270 {AOUTSL, ynone, Px, opBytes{0x6f}},
1271 {AOUTSW, ynone, Pe, opBytes{0x6f}},
1272 {APABSB, yxm_q4, Pq4, opBytes{0x1c}},
1273 {APABSD, yxm_q4, Pq4, opBytes{0x1e}},
1274 {APABSW, yxm_q4, Pq4, opBytes{0x1d}},
1275 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
1276 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
1277 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
1278 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
1279 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
1280 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
1281 {APADDQ, yxm, Pe, opBytes{0xd4}},
1282 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
1283 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
1284 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
1285 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
1286 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
1287 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
1288 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
1289 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
1290 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
1291 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
1292 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
1293 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
1294 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
1295 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
1296 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
1297 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
1298 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
1299 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
1300 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
1301 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
1302 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
1303 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
1304 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
1305 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
1306 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
1307 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
1308 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
1309 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
1310 {APHADDW, yxm_q4, Pq4, opBytes{0x01}},
1311 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
1312 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
1313 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
1314 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
1315 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
1316 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
1317 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
1318 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
1319 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
1320 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
1321 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
1322 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
1323 {APMAXSW, yxm, Pe, opBytes{0xee}},
1324 {APMAXUB, yxm, Pe, opBytes{0xde}},
1325 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
1326 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
1327 {APMINSB, yxm_q4, Pq4, opBytes{0x38}},
1328 {APMINSD, yxm_q4, Pq4, opBytes{0x39}},
1329 {APMINSW, yxm, Pe, opBytes{0xea}},
1330 {APMINUB, yxm, Pe, opBytes{0xda}},
1331 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
1332 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
1333 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
1334 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
1335 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
1336 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
1337 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
1338 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
1339 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
1340 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
1341 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
1342 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
1343 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
1344 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
1345 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
1346 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
1347 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
1348 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
1349 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
1350 {APMULLD, yxm_q4, Pq4, opBytes{0x40}},
1351 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
1352 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
1353 {APOPAL, ynone, P32, opBytes{0x61}},
1354 {APOPAW, ynone, Pe, opBytes{0x61}},
1355 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
1356 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
1357 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
1358 {APOPFL, ynone, P32, opBytes{0x9d}},
1359 {APOPFQ, ynone, Py, opBytes{0x9d}},
1360 {APOPFW, ynone, Pe, opBytes{0x9d}},
1361 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
1362 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
1363 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
1364 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
1365 {APSADBW, yxm, Pq, opBytes{0xf6}},
1366 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
1367 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
1368 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
1369 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
1370 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
1371 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
1372 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
1373 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
1374 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
1375 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
1376 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
1377 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
1378 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
1379 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
1380 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
1381 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
1382 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
1383 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
1384 {APSUBB, yxm, Pe, opBytes{0xf8}},
1385 {APSUBL, yxm, Pe, opBytes{0xfa}},
1386 {APSUBQ, yxm, Pe, opBytes{0xfb}},
1387 {APSUBSB, yxm, Pe, opBytes{0xe8}},
1388 {APSUBSW, yxm, Pe, opBytes{0xe9}},
1389 {APSUBUSB, yxm, Pe, opBytes{0xd8}},
1390 {APSUBUSW, yxm, Pe, opBytes{0xd9}},
1391 {APSUBW, yxm, Pe, opBytes{0xf9}},
1392 {APTEST, yxm_q4, Pq4, opBytes{0x17}},
1393 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
1394 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
1395 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
1396 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
1397 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
1398 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
1399 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
1400 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
1401 {APUSHAL, ynone, P32, opBytes{0x60}},
1402 {APUSHAW, ynone, Pe, opBytes{0x60}},
1403 {APUSHFL, ynone, P32, opBytes{0x9c}},
1404 {APUSHFQ, ynone, Py, opBytes{0x9c}},
1405 {APUSHFW, ynone, Pe, opBytes{0x9c}},
1406 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1407 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1408 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1409 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
1410 {AQUAD, ybyte, Px, opBytes{8}},
1411 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
1412 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1413 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1414 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1415 {ARCPPS, yxm, Pm, opBytes{0x53}},
1416 {ARCPSS, yxm, Pf3, opBytes{0x53}},
1417 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
1418 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1419 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1420 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1421 {AREP, ynone, Px, opBytes{0xf3}},
1422 {AREPN, ynone, Px, opBytes{0xf2}},
1423 {obj.ARET, ynone, Px, opBytes{0xc3}},
1424 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
1425 {ARETFL, yret, Px, opBytes{0xcb, 0xca}},
1426 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
1427 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
1428 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1429 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1430 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1431 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
1432 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1433 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1434 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1435 {ARSQRTPS, yxm, Pm, opBytes{0x52}},
1436 {ARSQRTSS, yxm, Pf3, opBytes{0x52}},
1437 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
1438 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1439 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1440 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1441 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1442 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
1443 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1444 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1445 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1446 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
1447 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1448 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1449 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1450 {ASCASB, ynone, Pb, opBytes{0xae}},
1451 {ASCASL, ynone, Px, opBytes{0xaf}},
1452 {ASCASQ, ynone, Pw, opBytes{0xaf}},
1453 {ASCASW, ynone, Pe, opBytes{0xaf}},
1454 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
1455 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
1456 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
1457 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
1458 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
1459 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
1460 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
1461 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
1462 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
1463 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
1464 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
1465 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
1466 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
1467 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
1468 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
1469 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
1470 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1471 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1472 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1473 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1474 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
1475 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1476 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1477 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1478 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
1479 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
1480 {ASQRTPD, yxm, Pe, opBytes{0x51}},
1481 {ASQRTPS, yxm, Pm, opBytes{0x51}},
1482 {ASQRTSD, yxm, Pf2, opBytes{0x51}},
1483 {ASQRTSS, yxm, Pf3, opBytes{0x51}},
1484 {ASTC, ynone, Px, opBytes{0xf9}},
1485 {ASTD, ynone, Px, opBytes{0xfd}},
1486 {ASTI, ynone, Px, opBytes{0xfb}},
1487 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
1488 {ASTOSB, ynone, Pb, opBytes{0xaa}},
1489 {ASTOSL, ynone, Px, opBytes{0xab}},
1490 {ASTOSQ, ynone, Pw, opBytes{0xab}},
1491 {ASTOSW, ynone, Pe, opBytes{0xab}},
1492 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
1493 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1494 {ASUBPD, yxm, Pe, opBytes{0x5c}},
1495 {ASUBPS, yxm, Pm, opBytes{0x5c}},
1496 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1497 {ASUBSD, yxm, Pf2, opBytes{0x5c}},
1498 {ASUBSS, yxm, Pf3, opBytes{0x5c}},
1499 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1500 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
1501 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
1502 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
1503 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1504 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1505 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1506 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
1507 {obj.ATEXT, ytext, Px, opBytes{}},
1508 {AUCOMISD, yxm, Pe, opBytes{0x2e}},
1509 {AUCOMISS, yxm, Pm, opBytes{0x2e}},
1510 {AUNPCKHPD, yxm, Pe, opBytes{0x15}},
1511 {AUNPCKHPS, yxm, Pm, opBytes{0x15}},
1512 {AUNPCKLPD, yxm, Pe, opBytes{0x14}},
1513 {AUNPCKLPS, yxm, Pm, opBytes{0x14}},
1514 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
1515 {AVERR, ydivl, Pm, opBytes{0x00, 04}},
1516 {AVERW, ydivl, Pm, opBytes{0x00, 05}},
1517 {AWAIT, ynone, Px, opBytes{0x9b}},
1518 {AWORD, ybyte, Px, opBytes{2}},
1519 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
1520 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
1521 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
1522 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
1523 {AXLAT, ynone, Px, opBytes{0xd7}},
1524 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
1525 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1526 {AXORPD, yxm, Pe, opBytes{0x57}},
1527 {AXORPS, yxm, Pm, opBytes{0x57}},
1528 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1529 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1530 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
1531 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
1532 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
1533 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
1534 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
1535 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
1536 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
1537 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
1538 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
1539 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
1540 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
1541 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
1542 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
1543 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
1544 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
1545 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
1546 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
1547 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
1548 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
1549 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
1550 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
1551 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
1552 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
1553 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
1554 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
1555 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
1556 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
1557 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
1558 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch
1559 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
1560 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
1561 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
1562 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
1563 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
1564 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
1565 {AFCOML, yfmvx, Px, opBytes{0xda, 02}},
1566 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
1567 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
1568 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
1569 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
1570 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
1571 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
1572 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
1573 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
1574 {AFADDDP, ycompp, Px, opBytes{0xde, 00}},
1575 {AFADDW, yfmvx, Px, opBytes{0xde, 00}},
1576 {AFADDL, yfmvx, Px, opBytes{0xda, 00}},
1577 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
1578 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
1579 {AFMULDP, ycompp, Px, opBytes{0xde, 01}},
1580 {AFMULW, yfmvx, Px, opBytes{0xde, 01}},
1581 {AFMULL, yfmvx, Px, opBytes{0xda, 01}},
1582 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
1583 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
1584 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
1585 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
1586 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
1587 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
1588 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
1589 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
1590 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
1591 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
1592 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
1593 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
1594 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
1595 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
1596 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
1597 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
1598 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
1599 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
1600 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
1601 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
1602 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
1603 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
1604 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
1605 {AFFREE, nil, 0, opBytes{}},
1606 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
1607 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
1608 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
1609 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
1610 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
1611 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
1612 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
1613 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
1614 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
1615 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
1616 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
1617 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
1618 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
1619 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
1620 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
1621 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
1622 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
1623 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
1624 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
1625 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
1626 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
1627 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
1628 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
1629 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
1630 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
1631 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
1632 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
1633 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
1634 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
1635 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
1636 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
1637 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
1638 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
1639 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
1640 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
1641 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
1642 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
1643 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
1644 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
1645 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
1646 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
1647 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
1648 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
1649 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
1650 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
1651 {AINVD, ynone, Pm, opBytes{0x08}},
1652 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
1653 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
1654 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
1655 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
1656 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
1657 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
1658 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
1659 {ARDMSR, ynone, Pm, opBytes{0x32}},
1660 {ARDPMC, ynone, Pm, opBytes{0x33}},
1661 {ARDTSC, ynone, Pm, opBytes{0x31}},
1662 {ARSM, ynone, Pm, opBytes{0xaa}},
1663 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
1664 {ASYSRET, ynone, Pm, opBytes{0x07}},
1665 {AWBINVD, ynone, Pm, opBytes{0x09}},
1666 {AWRMSR, ynone, Pm, opBytes{0x30}},
1667 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
1668 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
1669 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
1670 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
1671 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
1672 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
1673 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1674 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1675 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1676 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
1677 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
1678 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
1679 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
1680 {AMOVQL, yrl_ml, Px, opBytes{0x89}},
1681 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
1682 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
1683 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
1684 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
1685 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
1686 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
1687 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
1688 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
1689 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
1690 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
1691 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
1692 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
1693 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
1694 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
1695 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
1696 {AMOVDDUP, yxm, Pf2, opBytes{0x12}},
1697 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
1698 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
1699 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
1700 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
1701 {AUD1, ynone, Pm, opBytes{0xb9, 0}},
1702 {AUD2, ynone, Pm, opBytes{0x0b, 0}},
1703 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
1704 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
1705 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
1706 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
1707 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
1708 {ALMSW, ydivl, Pm, opBytes{0x01, 06}},
1709 {ALLDT, ydivl, Pm, opBytes{0x00, 02}},
1710 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
1711 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
1712 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
1713 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
1714 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
1715 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
1716 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
1717 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
1718 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
1719 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
1720 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
1721 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
1722 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
1723 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
1724 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
1725 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
1726 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
1727 {ASGDT, yclflush, Pm, opBytes{0x01, 00}},
1728 {ASIDT, yclflush, Pm, opBytes{0x01, 01}},
1729 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
1730 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
1731 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
1732 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
1733 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
1734 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
1735 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
1736 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
1737 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
1738 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
1739 {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1740 {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1741 {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
1742 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
1743 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
1744 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
1745 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
1746 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
1747 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
1748 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
1749 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
1750 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
1751 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
1752 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
1753 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
1754 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
1755 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
1756 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
1757 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
1758 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
1759 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
1760 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
1761 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
1762 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
1763 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
1764 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
1765 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
1766 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
1767 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
1768 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
1769 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
1770 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
1771 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
1772 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
1773 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
1774 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
1775 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
1776 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
1777 {ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
1779 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
1780 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
1781 {AXACQUIRE, ynone, Px, opBytes{0xf2}},
1782 {AXRELEASE, ynone, Px, opBytes{0xf3}},
1783 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
1784 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
1785 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
1786 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
1787 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
1788 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
1789 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
1790 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
1791 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
1793 {obj.AEND, nil, 0, opBytes{}},
1794 {0, nil, 0, opBytes{}},
1797 var opindex [(ALAST + 1) & obj.AMask]*Optab
1799 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
1800 // This happens on systems like Solaris that call .so functions instead of system calls.
1801 // It does not seem to be necessary for any other systems. This is probably working
1802 // around a Solaris-specific bug that should be fixed differently, but we don't know
1803 // what that bug is. And this does fix it.
1804 func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
1805 if ctxt.Headtype == objabi.Hsolaris {
1806 // All the Solaris dynamic imports from libc.so begin with "libc_".
1807 return strings.HasPrefix(s.Name, "libc_")
1809 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
1812 // single-instruction no-ops of various lengths.
1813 // constructed by hand and disassembled with gdb to verify.
1814 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
1815 var nop = [][16]uint8{
1819 {0x0F, 0x1F, 0x40, 0x00},
1820 {0x0F, 0x1F, 0x44, 0x00, 0x00},
1821 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
1822 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
1823 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1824 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1827 // Native Client rejects the repeated 0x66 prefix.
1828 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1829 func fillnop(p []byte, n int) {
1837 copy(p[:m], nop[m-1][:m])
1843 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
1844 s.Grow(int64(c) + int64(pad))
1845 fillnop(s.P[c:], int(pad))
1849 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
1850 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
1856 // isJump returns whether p is a jump instruction.
1857 // It is used to ensure that no standalone or macro-fused jump will straddle
1858 // or end on a 32 byte boundary by inserting NOPs before the jumps.
1859 func isJump(p *obj.Prog) bool {
1860 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
1861 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
1864 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
1865 // jump. Otherwise, nil is returned.
1866 func lookForJCC(p *obj.Prog) *obj.Prog {
1867 // Skip any PCDATA, FUNCDATA or NOP instructions
1869 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
1872 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
1877 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
1878 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
1886 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
1887 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
1888 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
1889 func fusedJump(p *obj.Prog) (bool, uint8) {
1892 // The first instruction in a macro fused pair may be preceded by the LOCK prefix,
1893 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
1894 // need to be careful to insert any padding before the locks rather than directly after them.
1896 if p.As == AXRELEASE || p.As == AXACQUIRE {
1897 fusedSize += p.Isize
1898 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1905 fusedSize += p.Isize
1906 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1912 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
1914 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
1915 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
1917 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
1918 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
1920 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
1921 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
1923 if !cmpAddSub && !testAnd && !incDec {
1928 var argOne obj.AddrType
1929 var argTwo obj.AddrType
1931 argOne = p.From.Type
1935 argTwo = p.From.Type
1937 if argOne == obj.TYPE_REG {
1938 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
1941 } else if argOne == obj.TYPE_MEM {
1942 if argTwo != obj.TYPE_REG {
1950 fusedSize += p.Isize
1951 jmp := lookForJCC(p)
1956 fusedSize += jmp.Isize
1959 return true, fusedSize
1962 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
1963 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
1968 return true, fusedSize
1971 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
1975 return true, fusedSize
1978 type padJumpsCtx int32
1980 func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
1981 // Disable jump padding on 32 bit builds by setting
1983 if ctxt.Arch.Family == sys.I386 {
1984 return padJumpsCtx(0)
1987 // Disable jump padding for hand written assembly code.
1989 return padJumpsCtx(0)
1992 return padJumpsCtx(32)
1995 // padJump detects whether the instruction being assembled is a standalone or a macro-fused
1996 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
1997 // not cross or end on a 32 byte boundary.
1998 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
2004 fj, fjSize := fusedJump(p)
2005 mask := int32(pjc - 1)
2007 if (c&mask)+int32(fjSize) >= int32(pjc) {
2008 toPad = int32(pjc) - (c & mask)
2010 } else if isJump(p) {
2011 if (c&mask)+int32(p.Isize) >= int32(pjc) {
2012 toPad = int32(pjc) - (c & mask)
2019 return noppad(ctxt, s, c, toPad)
2022 // reAssemble is called if an instruction's size changes during assembly. If
2023 // it does and the instruction is a standalone or a macro-fused jump we need to
2025 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
2030 fj, _ := fusedJump(p)
2031 return fj || isJump(p)
2034 type nopPad struct {
2035 p *obj.Prog // Instruction before the pad
2036 n int32 // Size of the pad
2039 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
2040 if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
2041 ctxt.Diag("-spectre=ret not supported on 386")
2042 ctxt.Retpoline = false // don't keep printing
2045 pjc := makePjcCtx(ctxt)
2052 ctxt.Diag("x86 tables not initialized, call x86.instinit first")
2055 for p := s.Func().Text; p != nil; p = p.Link {
2056 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
2060 p.To.Type = obj.TYPE_REG
2062 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
2063 // One exception: It is smaller to encode $-0x80 than $0x80.
2064 // For that case, flip the sign and the op:
2065 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
2066 switch v := p.From.Offset; {
2069 case v == 0x80 || (v < 0 && v != -0x80):
2070 p.As = spadjop(ctxt, AADDL, AADDQ)
2073 p.As = spadjop(ctxt, ASUBL, ASUBQ)
2076 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
2077 if p.To.Type != obj.TYPE_REG {
2078 ctxt.Diag("non-retpoline-compatible: %v", p)
2081 p.To.Type = obj.TYPE_BRANCH
2082 p.To.Name = obj.NAME_EXTERN
2083 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
2089 var count int64 // rough count of number of instructions
2090 for p := s.Func().Text; p != nil; p = p.Link {
2092 p.Back = branchShort // use short branches first time through
2093 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
2094 p.Back |= branchBackwards
2095 q.Back |= branchLoopHead
2098 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
2103 errors := ctxt.Errors
2104 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
2105 nrelocs0 := len(s.R)
2107 // This loop continues while there are reasons to re-assemble
2108 // whole block, like the presence of long forward jumps.
2110 for i := range s.R[nrelocs0:] {
2111 s.R[nrelocs0+i] = obj.Reloc{}
2113 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
2118 for p := s.Func().Text; p != nil; p = p.Link {
2120 c = pjc.padJump(ctxt, s, p, c)
2122 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
2124 v := -c & (loopAlign - 1)
2126 if v <= maxLoopPad {
2127 s.Grow(int64(c) + int64(v))
2128 fillnop(s.P[c:], int(v))
2135 // process forward jumps to p
2136 for q := p.Rel; q != nil; q = q.Forwd {
2137 v := int32(p.Pc - (q.Pc + int64(q.Isize)))
2138 if q.Back&branchShort != 0 {
2141 q.Back ^= branchShort
2144 if q.As == AJCXZL || q.As == AXBEGIN {
2145 s.P[q.Pc+2] = byte(v)
2147 s.P[q.Pc+1] = byte(v)
2150 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
2157 ab.asmins(ctxt, s, p)
2159 if int(p.Isize) != m {
2161 if pjc.reAssemble(p) {
2162 // We need to re-assemble here to check for jumps and fused jumps
2163 // that span or end on 32 byte boundaries.
2168 s.Grow(p.Pc + int64(m))
2169 copy(s.P[p.Pc:], ab.Bytes())
2170 // If there was padding, remember it.
2171 if pPrev != nil && !ctxt.IsAsm && c > c0 {
2172 nops = append(nops, nopPad{p: pPrev, n: c - c0})
2180 ctxt.Diag("span must be looping")
2186 if ctxt.Errors > errors {
2190 // splice padding nops into Progs
2191 for _, n := range nops {
2193 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
2199 if false { /* debug['a'] > 1 */
2200 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
2202 for i = 0; i < len(s.P); i++ {
2203 fmt.Printf(" %.2x", s.P[i])
2205 fmt.Printf("\n %.6x", uint(i+1))
2213 for i := 0; i < len(s.R); i++ {
2215 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
2219 // Mark nonpreemptible instruction sequences.
2220 // The 2-instruction TLS access sequence
2222 // MOVQ 0(BX)(TLS*1), BX
2223 // is not async preemptible, as if it is preempted and resumed on
2224 // a different thread, the TLS address may become invalid.
2225 if !CanUse1InsnTLS(ctxt) {
2226 useTLS := func(p *obj.Prog) bool {
2227 // Only need to mark the second instruction, which has
2228 // REG_TLS as Index. (It is okay to interrupt and restart
2229 // the first instruction.)
2230 return p.From.Index == REG_TLS
2232 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
2235 // Now that we know byte offsets, we can generate jump table entries.
2236 // TODO: could this live in obj instead of obj/$ARCH?
2237 for _, jt := range s.Func().JumpTables {
2238 for i, p := range jt.Targets {
2239 // The ith jumptable entry points to the p.Pc'th
2240 // byte in the function symbol s.
2241 jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
2246 func instinit(ctxt *obj.Link) {
2248 // Already initialized; stop now.
2249 // This happens in the cmd/asm tests,
2250 // each of which re-initializes the arch.
2254 switch ctxt.Headtype {
2256 plan9privates = ctxt.Lookup("_privates")
2259 for i := range avxOptab {
2261 if opindex[c&obj.AMask] != nil {
2262 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
2264 opindex[c&obj.AMask] = &avxOptab[i]
2266 for i := 1; optab[i].as != 0; i++ {
2268 if opindex[c&obj.AMask] != nil {
2269 ctxt.Diag("phase error in optab: %d (%v)", i, c)
2271 opindex[c&obj.AMask] = &optab[i]
2274 for i := 0; i < Ymax; i++ {
2275 ycover[i*Ymax+i] = 1
2278 ycover[Yi0*Ymax+Yu2] = 1
2279 ycover[Yi1*Ymax+Yu2] = 1
2281 ycover[Yi0*Ymax+Yi8] = 1
2282 ycover[Yi1*Ymax+Yi8] = 1
2283 ycover[Yu2*Ymax+Yi8] = 1
2284 ycover[Yu7*Ymax+Yi8] = 1
2286 ycover[Yi0*Ymax+Yu7] = 1
2287 ycover[Yi1*Ymax+Yu7] = 1
2288 ycover[Yu2*Ymax+Yu7] = 1
2290 ycover[Yi0*Ymax+Yu8] = 1
2291 ycover[Yi1*Ymax+Yu8] = 1
2292 ycover[Yu2*Ymax+Yu8] = 1
2293 ycover[Yu7*Ymax+Yu8] = 1
2295 ycover[Yi0*Ymax+Ys32] = 1
2296 ycover[Yi1*Ymax+Ys32] = 1
2297 ycover[Yu2*Ymax+Ys32] = 1
2298 ycover[Yu7*Ymax+Ys32] = 1
2299 ycover[Yu8*Ymax+Ys32] = 1
2300 ycover[Yi8*Ymax+Ys32] = 1
2302 ycover[Yi0*Ymax+Yi32] = 1
2303 ycover[Yi1*Ymax+Yi32] = 1
2304 ycover[Yu2*Ymax+Yi32] = 1
2305 ycover[Yu7*Ymax+Yi32] = 1
2306 ycover[Yu8*Ymax+Yi32] = 1
2307 ycover[Yi8*Ymax+Yi32] = 1
2308 ycover[Ys32*Ymax+Yi32] = 1
2310 ycover[Yi0*Ymax+Yi64] = 1
2311 ycover[Yi1*Ymax+Yi64] = 1
2312 ycover[Yu7*Ymax+Yi64] = 1
2313 ycover[Yu2*Ymax+Yi64] = 1
2314 ycover[Yu8*Ymax+Yi64] = 1
2315 ycover[Yi8*Ymax+Yi64] = 1
2316 ycover[Ys32*Ymax+Yi64] = 1
2317 ycover[Yi32*Ymax+Yi64] = 1
2319 ycover[Yal*Ymax+Yrb] = 1
2320 ycover[Ycl*Ymax+Yrb] = 1
2321 ycover[Yax*Ymax+Yrb] = 1
2322 ycover[Ycx*Ymax+Yrb] = 1
2323 ycover[Yrx*Ymax+Yrb] = 1
2324 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
2326 ycover[Ycl*Ymax+Ycx] = 1
2328 ycover[Yax*Ymax+Yrx] = 1
2329 ycover[Ycx*Ymax+Yrx] = 1
2331 ycover[Yax*Ymax+Yrl] = 1
2332 ycover[Ycx*Ymax+Yrl] = 1
2333 ycover[Yrx*Ymax+Yrl] = 1
2334 ycover[Yrl32*Ymax+Yrl] = 1
2336 ycover[Yf0*Ymax+Yrf] = 1
2338 ycover[Yal*Ymax+Ymb] = 1
2339 ycover[Ycl*Ymax+Ymb] = 1
2340 ycover[Yax*Ymax+Ymb] = 1
2341 ycover[Ycx*Ymax+Ymb] = 1
2342 ycover[Yrx*Ymax+Ymb] = 1
2343 ycover[Yrb*Ymax+Ymb] = 1
2344 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
2345 ycover[Ym*Ymax+Ymb] = 1
2347 ycover[Yax*Ymax+Yml] = 1
2348 ycover[Ycx*Ymax+Yml] = 1
2349 ycover[Yrx*Ymax+Yml] = 1
2350 ycover[Yrl*Ymax+Yml] = 1
2351 ycover[Yrl32*Ymax+Yml] = 1
2352 ycover[Ym*Ymax+Yml] = 1
2354 ycover[Yax*Ymax+Ymm] = 1
2355 ycover[Ycx*Ymax+Ymm] = 1
2356 ycover[Yrx*Ymax+Ymm] = 1
2357 ycover[Yrl*Ymax+Ymm] = 1
2358 ycover[Yrl32*Ymax+Ymm] = 1
2359 ycover[Ym*Ymax+Ymm] = 1
2360 ycover[Ymr*Ymax+Ymm] = 1
2362 ycover[Yxr0*Ymax+Yxr] = 1
2364 ycover[Ym*Ymax+Yxm] = 1
2365 ycover[Yxr0*Ymax+Yxm] = 1
2366 ycover[Yxr*Ymax+Yxm] = 1
2368 ycover[Ym*Ymax+Yym] = 1
2369 ycover[Yyr*Ymax+Yym] = 1
2371 ycover[Yxr0*Ymax+YxrEvex] = 1
2372 ycover[Yxr*Ymax+YxrEvex] = 1
2374 ycover[Ym*Ymax+YxmEvex] = 1
2375 ycover[Yxr0*Ymax+YxmEvex] = 1
2376 ycover[Yxr*Ymax+YxmEvex] = 1
2377 ycover[YxrEvex*Ymax+YxmEvex] = 1
2379 ycover[Yyr*Ymax+YyrEvex] = 1
2381 ycover[Ym*Ymax+YymEvex] = 1
2382 ycover[Yyr*Ymax+YymEvex] = 1
2383 ycover[YyrEvex*Ymax+YymEvex] = 1
2385 ycover[Ym*Ymax+Yzm] = 1
2386 ycover[Yzr*Ymax+Yzm] = 1
2388 ycover[Yk0*Ymax+Yk] = 1
2389 ycover[Yknot0*Ymax+Yk] = 1
2391 ycover[Yk0*Ymax+Ykm] = 1
2392 ycover[Yknot0*Ymax+Ykm] = 1
2393 ycover[Yk*Ymax+Ykm] = 1
2394 ycover[Ym*Ymax+Ykm] = 1
2396 ycover[Yxvm*Ymax+YxvmEvex] = 1
2398 ycover[Yyvm*Ymax+YyvmEvex] = 1
2400 for i := 0; i < MAXREG; i++ {
2402 if i >= REG_AL && i <= REG_R15B {
2403 reg[i] = (i - REG_AL) & 7
2404 if i >= REG_SPB && i <= REG_DIB {
2407 if i >= REG_R8B && i <= REG_R15B {
2408 regrex[i] = Rxr | Rxx | Rxb
2412 if i >= REG_AH && i <= REG_BH {
2413 reg[i] = 4 + ((i - REG_AH) & 7)
2415 if i >= REG_AX && i <= REG_R15 {
2416 reg[i] = (i - REG_AX) & 7
2418 regrex[i] = Rxr | Rxx | Rxb
2422 if i >= REG_F0 && i <= REG_F0+7 {
2423 reg[i] = (i - REG_F0) & 7
2425 if i >= REG_M0 && i <= REG_M0+7 {
2426 reg[i] = (i - REG_M0) & 7
2428 if i >= REG_K0 && i <= REG_K0+7 {
2429 reg[i] = (i - REG_K0) & 7
2431 if i >= REG_X0 && i <= REG_X0+15 {
2432 reg[i] = (i - REG_X0) & 7
2434 regrex[i] = Rxr | Rxx | Rxb
2437 if i >= REG_X16 && i <= REG_X16+15 {
2438 reg[i] = (i - REG_X16) & 7
2440 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2445 if i >= REG_Y0 && i <= REG_Y0+15 {
2446 reg[i] = (i - REG_Y0) & 7
2448 regrex[i] = Rxr | Rxx | Rxb
2451 if i >= REG_Y16 && i <= REG_Y16+15 {
2452 reg[i] = (i - REG_Y16) & 7
2454 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2459 if i >= REG_Z0 && i <= REG_Z0+15 {
2460 reg[i] = (i - REG_Z0) & 7
2462 regrex[i] = Rxr | Rxx | Rxb
2465 if i >= REG_Z16 && i <= REG_Z16+15 {
2466 reg[i] = (i - REG_Z16) & 7
2468 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2474 if i >= REG_CR+8 && i <= REG_CR+15 {
2480 var isAndroid = buildcfg.GOOS == "android"
2482 func prefixof(ctxt *obj.Link, a *obj.Addr) int {
2483 if a.Reg < REG_CS && a.Index < REG_CS { // fast path
2486 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
2504 // NOTE: Systems listed here should be only systems that
2505 // support direct TLS references like 8(TLS) implemented as
2506 // direct references from FS or GS. Systems that require
2507 // the initial-exec model, where you load the TLS base into
2508 // a register and then index from that register, do not reach
2509 // this code and should not be listed.
2510 if ctxt.Arch.Family == sys.I386 {
2511 switch ctxt.Headtype {
2516 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2518 case objabi.Hdarwin,
2527 switch ctxt.Headtype {
2529 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2536 if ctxt.Flag_shared {
2537 log.Fatalf("unknown TLS base register for linux with -shared")
2542 case objabi.Hdragonfly,
2549 case objabi.Hdarwin:
2566 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
2567 // When building for inclusion into a shared library, an instruction of the form
2568 // MOV off(CX)(TLS*1), AX
2570 // mov %gs:off(%ecx), %eax // on i386
2571 // mov %fs:off(%rcx), %rax // on amd64
2572 // which assumes that the correct TLS offset has been loaded into CX (today
2573 // there is only one TLS variable -- g -- so this is OK). When not building for
2574 // a shared library the instruction it becomes
2575 // mov 0x0(%ecx), %eax // on i386
2576 // mov 0x0(%rcx), %rax // on amd64
2577 // and a R_TLS_LE relocation, and so does not require a prefix.
2578 if ctxt.Arch.Family == sys.I386 {
2594 // oclassRegList returns multisource operand class for addr.
2595 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
2596 // TODO(quasilyte): when oclass register case is refactored into
2597 // lookup table, use it here to get register kind more easily.
2598 // Helper functions like regIsXmm should go away too (they will become redundant).
2600 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
2601 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
2602 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
2604 reg0, reg1 := decodeRegisterRange(addr.Offset)
2605 low := regIndex(int16(reg0))
2606 high := regIndex(int16(reg1))
2608 if ctxt.Arch.Family == sys.I386 {
2609 if low >= 8 || high >= 8 {
2617 case regIsXmm(reg0) && regIsXmm(reg1):
2618 return YxrEvexMulti4
2619 case regIsYmm(reg0) && regIsYmm(reg1):
2620 return YyrEvexMulti4
2621 case regIsZmm(reg0) && regIsZmm(reg1):
2631 // oclassVMem returns V-mem (vector memory with VSIB) operand class.
2632 // For addr that is not V-mem returns (Yxxx, false).
2633 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
2652 if ctxt.Arch.Family == sys.I386 {
2672 if ctxt.Arch.Family == sys.I386 {
2675 return YxvmEvex, true
2694 if ctxt.Arch.Family == sys.I386 {
2714 if ctxt.Arch.Family == sys.I386 {
2717 return YyvmEvex, true
2752 if ctxt.Arch.Family == sys.I386 {
2761 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
2763 case obj.TYPE_REGLIST:
2764 return oclassRegList(ctxt, a)
2769 case obj.TYPE_BRANCH:
2772 case obj.TYPE_INDIR:
2773 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
2779 // Pseudo registers have negative index, but SP is
2780 // not pseudo on x86, hence REG_SP check is not redundant.
2781 if a.Index == REG_SP || a.Index < 0 {
2782 // Can't use FP/SB/PC/SP as the index register.
2786 if vmem, ok := oclassVMem(ctxt, a); ok {
2790 if ctxt.Arch.Family == sys.AMD64 {
2792 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
2793 // Global variables can't use index registers and their
2794 // base register is %rip (%rip is encoded as REG_NONE).
2795 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
2798 case obj.NAME_AUTO, obj.NAME_PARAM:
2799 // These names must have a base of SP. The old compiler
2800 // uses 0 for the base register. SSA uses REG_SP.
2801 if a.Reg != REG_SP && a.Reg != 0 {
2815 case obj.NAME_GOTREF:
2816 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
2819 case obj.NAME_EXTERN,
2821 if a.Sym != nil && useAbs(ctxt, a.Sym) {
2824 return Yiauto // use pc-relative addressing
2831 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
2832 // and got Yi32 in an earlier version of this code.
2833 // Keep doing that until we fix yduff etc.
2834 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
2838 if a.Sym != nil || a.Name != obj.NAME_NONE {
2839 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
2843 case obj.TYPE_CONST:
2845 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
2849 if ctxt.Arch.Family == sys.I386 {
2857 case v >= 0 && v <= 3:
2859 case v >= 0 && v <= 127:
2861 case v >= 0 && v <= 255:
2863 case v >= -128 && v <= 127:
2866 if ctxt.Arch.Family == sys.I386 {
2871 return Ys32 // can sign extend
2874 return Yi32 // unsigned
2878 case obj.TYPE_TEXTSIZE:
2882 if a.Type != obj.TYPE_REG {
2883 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
2908 if ctxt.Arch.Family == sys.I386 {
2927 case REG_DX, REG_BX:
2930 case REG_R8, // not really Yrl
2938 if ctxt.Arch.Family == sys.I386 {
2943 case REG_SP, REG_BP, REG_SI, REG_DI:
2944 if ctxt.Arch.Family == sys.I386 {
3079 if ctxt.Arch.Family == sys.I386 {
3179 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
3180 // and hold assembly state.
3181 type AsmBuf struct {
3185 vexflag bool // Per inst: true for VEX-encoded
3186 evexflag bool // Per inst: true for EVEX-encoded
3191 evex evexBits // Initialized when evexflag is true
3194 // Put1 appends one byte to the end of the buffer.
3195 func (ab *AsmBuf) Put1(x byte) {
3200 // Put2 appends two bytes to the end of the buffer.
3201 func (ab *AsmBuf) Put2(x, y byte) {
3202 ab.buf[ab.off+0] = x
3203 ab.buf[ab.off+1] = y
3207 // Put3 appends three bytes to the end of the buffer.
3208 func (ab *AsmBuf) Put3(x, y, z byte) {
3209 ab.buf[ab.off+0] = x
3210 ab.buf[ab.off+1] = y
3211 ab.buf[ab.off+2] = z
3215 // Put4 appends four bytes to the end of the buffer.
3216 func (ab *AsmBuf) Put4(x, y, z, w byte) {
3217 ab.buf[ab.off+0] = x
3218 ab.buf[ab.off+1] = y
3219 ab.buf[ab.off+2] = z
3220 ab.buf[ab.off+3] = w
3224 // PutInt16 writes v into the buffer using little-endian encoding.
3225 func (ab *AsmBuf) PutInt16(v int16) {
3226 ab.buf[ab.off+0] = byte(v)
3227 ab.buf[ab.off+1] = byte(v >> 8)
3231 // PutInt32 writes v into the buffer using little-endian encoding.
3232 func (ab *AsmBuf) PutInt32(v int32) {
3233 ab.buf[ab.off+0] = byte(v)
3234 ab.buf[ab.off+1] = byte(v >> 8)
3235 ab.buf[ab.off+2] = byte(v >> 16)
3236 ab.buf[ab.off+3] = byte(v >> 24)
3240 // PutInt64 writes v into the buffer using little-endian encoding.
3241 func (ab *AsmBuf) PutInt64(v int64) {
3242 ab.buf[ab.off+0] = byte(v)
3243 ab.buf[ab.off+1] = byte(v >> 8)
3244 ab.buf[ab.off+2] = byte(v >> 16)
3245 ab.buf[ab.off+3] = byte(v >> 24)
3246 ab.buf[ab.off+4] = byte(v >> 32)
3247 ab.buf[ab.off+5] = byte(v >> 40)
3248 ab.buf[ab.off+6] = byte(v >> 48)
3249 ab.buf[ab.off+7] = byte(v >> 56)
3253 // Put copies b into the buffer.
3254 func (ab *AsmBuf) Put(b []byte) {
3255 copy(ab.buf[ab.off:], b)
3259 // PutOpBytesLit writes zero terminated sequence of bytes from op,
3260 // starting at specified offset (e.g. z counter value).
3261 // Trailing 0 is not written.
3263 // Intended to be used for literal Z cases.
3264 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
3265 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
3266 for int(op[offset]) != 0 {
3267 ab.Put1(byte(op[offset]))
3272 // Insert inserts b at offset i.
3273 func (ab *AsmBuf) Insert(i int, b byte) {
3275 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
3279 // Last returns the byte at the end of the buffer.
3280 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
3282 // Len returns the length of the buffer.
3283 func (ab *AsmBuf) Len() int { return ab.off }
3285 // Bytes returns the contents of the buffer.
3286 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
3288 // Reset empties the buffer.
3289 func (ab *AsmBuf) Reset() { ab.off = 0 }
3291 // At returns the byte at offset i.
3292 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
3294 // asmidx emits SIB byte.
3295 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
3298 // X/Y index register is used in VSIB.
3387 if ctxt.Arch.Family == sys.I386 {
3448 case REG_NONE: // must be mod=00
3459 if ctxt.Arch.Family == sys.I386 {
3479 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
3483 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
3486 v := vaddr(ctxt, p, a, &rel)
3489 ctxt.Diag("bad reloc")
3491 r := obj.Addrel(cursym)
3493 r.Off = int32(p.Pc + int64(ab.Len()))
3496 ab.PutInt32(int32(v))
3499 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
3505 case obj.NAME_STATIC,
3510 ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3514 if a.Name == obj.NAME_GOTREF {
3516 r.Type = objabi.R_GOTPCREL
3517 } else if useAbs(ctxt, s) {
3519 r.Type = objabi.R_ADDR
3522 r.Type = objabi.R_PCREL
3525 r.Off = -1 // caller must fill in
3532 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
3534 ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3538 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
3539 r.Type = objabi.R_TLS_LE
3541 r.Off = -1 // caller must fill in
3550 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
3555 if a.Offset != int64(int32(a.Offset)) {
3556 // The rules are slightly different for 386 and AMD64,
3557 // mostly for historical reasons. We may unify them later,
3558 // but it must be discussed beforehand.
3560 // For 64bit mode only LEAL is allowed to overflow.
3561 // It's how https://golang.org/cl/59630 made it.
3562 // crypto/sha1/sha1block_amd64.s depends on this feature.
3564 // For 32bit mode rules are more permissive.
3565 // If offset fits uint32, it's permitted.
3566 // This is allowed for assembly that wants to use 32-bit hex
3567 // constants, e.g. LEAL 0x99999999(AX), AX.
3568 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
3569 (ctxt.Arch.Family != sys.AMD64 &&
3570 int64(uint32(a.Offset)) == a.Offset &&
3571 ab.rexflag&Rxw == 0)
3573 ctxt.Diag("offset too large in %s", p)
3576 v := int32(a.Offset)
3581 if a.Name == obj.NAME_NONE {
3582 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
3584 if a.Index == REG_TLS {
3585 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
3590 const regFirst = REG_AL
3591 const regLast = REG_Z31
3592 if a.Reg < regFirst || regLast < a.Reg {
3598 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
3599 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
3603 if a.Type != obj.TYPE_MEM {
3607 if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
3610 case obj.NAME_EXTERN,
3613 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
3616 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3617 // The base register has already been set. It holds the PC
3618 // of this instruction returned by a PC-reading thunk.
3619 // See obj6.go:rewriteToPcrel.
3623 v = int32(vaddr(ctxt, p, a, &rel))
3630 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
3631 if base == REG_NONE {
3632 ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3633 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3637 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3638 ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3639 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3643 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3644 ab.Put1(byte(1<<6 | 4<<0 | r<<3))
3645 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3650 ab.Put1(byte(2<<6 | 4<<0 | r<<3))
3651 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3657 case obj.NAME_STATIC,
3661 ctxt.Diag("bad addr: %v", p)
3663 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3664 // The base register has already been set. It holds the PC
3665 // of this instruction returned by a PC-reading thunk.
3666 // See obj6.go:rewriteToPcrel.
3670 v = int32(vaddr(ctxt, p, a, &rel))
3677 if base == REG_TLS {
3678 v = int32(vaddr(ctxt, p, a, &rel))
3681 ab.rexflag |= regrex[base]&Rxb | rex
3682 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
3683 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
3684 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
3685 ctxt.Diag("%v has offset against gotref", p)
3687 ab.Put1(byte(0<<6 | 5<<0 | r<<3))
3693 byte(0<<6|4<<0|r<<3), // sib present
3694 0<<6|4<<3|5<<0, // DS:d32
3699 if base == REG_SP || base == REG_R12 {
3701 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3702 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3706 if disp8, ok := toDisp8(v, p, ab); ok {
3707 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
3708 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3713 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3714 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3718 if REG_AX <= base && base <= REG_R15 {
3719 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
3720 ctxt.Headtype != objabi.Hwindows {
3722 rel.Type = objabi.R_TLS_LE
3729 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3730 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3734 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3735 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
3739 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3748 ctxt.Diag("bad rel")
3752 r := obj.Addrel(cursym)
3754 r.Off = int32(p.Pc + int64(ab.Len()))
3761 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
3764 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
3765 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
3768 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
3769 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
3772 func bytereg(a *obj.Addr, t *uint8) {
3773 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
3774 a.Reg += REG_AL - REG_AX
3779 func unbytereg(a *obj.Addr, t *uint8) {
3780 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
3781 a.Reg += REG_AX - REG_AL
3787 movLit uint8 = iota // Like Zlit
3792 movFullPtr // Load full pointer, trash heap (unsupported)
3797 var ymovtab = []movtab{
3799 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
3800 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
3801 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
3802 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
3803 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3804 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3805 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3806 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3807 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
3808 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
3809 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
3810 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
3811 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
3812 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
3815 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
3816 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
3817 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
3818 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3819 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3820 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3821 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3822 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
3823 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
3824 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
3825 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
3826 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
3829 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
3830 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
3831 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
3832 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
3833 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
3834 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
3835 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
3836 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
3837 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
3838 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
3839 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
3840 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
3843 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3844 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3845 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3846 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3847 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3848 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3849 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3850 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3851 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3852 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3853 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3854 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3855 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3856 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3857 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3858 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3859 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3860 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3861 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3862 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3865 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3866 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3867 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3868 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3869 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
3870 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
3871 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3872 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3873 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3874 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3875 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3876 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3877 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
3878 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
3879 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3880 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3883 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
3884 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
3885 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
3886 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
3888 // lgdt, sgdt, lidt, sidt
3889 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3890 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3891 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3892 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3893 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3894 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3895 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3896 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3899 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
3900 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
3903 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
3904 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
3907 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
3908 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
3910 /* load full pointer - unsupported
3911 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
3912 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
3916 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3917 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3918 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3919 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3920 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3921 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3922 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3923 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3924 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3925 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3926 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3927 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3928 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3929 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3930 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3931 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3932 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3933 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3936 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3937 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3938 {0, 0, 0, 0, 0, [4]uint8{}},
3941 func isax(a *obj.Addr) bool {
3943 case REG_AX, REG_AL, REG_AH:
3947 return a.Index == REG_AX
3950 func subreg(p *obj.Prog, from int, to int) {
3951 if false { /* debug['Q'] */
3952 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
3955 if int(p.From.Reg) == from {
3956 p.From.Reg = int16(to)
3960 if int(p.To.Reg) == from {
3961 p.To.Reg = int16(to)
3965 if int(p.From.Index) == from {
3966 p.From.Index = int16(to)
3970 if int(p.To.Index) == from {
3971 p.To.Index = int16(to)
3975 if false { /* debug['Q'] */
3976 fmt.Printf("%v\n", p)
3980 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
3982 case Pm, Pe, Pf2, Pf3:
3995 if ab.Len() == 0 || ab.Last() != Pm {
4004 var bpduff1 = []byte{
4005 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
4006 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
4009 var bpduff2 = []byte{
4010 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
4013 // asmevex emits EVEX pregis and opcode byte.
4014 // In addition to asmvex r/m, vvvv and reg fields also requires optional
4015 // K-masking register.
4017 // Expects asmbuf.evex to be properly initialized.
4018 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
4027 if regrex[r.Reg]&Rxr != 0 {
4028 rexR = 0 // "ModR/M.reg" selector 4th bit.
4030 if regrex[r.Reg]&RxrEvex != 0 {
4031 evexR = 0 // "ModR/M.reg" selector 5th bit.
4035 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
4037 } else if regrex[rm.Index]&Rxx != 0 {
4040 if regrex[rm.Reg]&Rxb != 0 {
4044 // P0 = [R][X][B][R'][00][mm]
4054 // 4bit-wide reg index.
4055 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4058 // P1 = [W][vvvv][1][pp]
4059 p1 := (evex.W() << 7) |
4064 suffix := evexSuffixMap[p.Scond]
4071 if !evex.ZeroingEnabled() {
4072 ctxt.Diag("unsupported zeroing: %v", p)
4075 // When you request zeroing you must specify a mask register.
4077 ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
4078 } else if k.Reg == REG_K0 {
4079 // The mask register must not be K0. That restriction is already
4080 // handled by the Yknot0 restriction in the opcode tables, so we
4081 // won't ever reach here. But put something sensible here just in case.
4082 ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
4087 case suffix.rounding != rcUnset:
4088 if rm != nil && rm.Type == obj.TYPE_MEM {
4089 ctxt.Diag("illegal rounding with memory argument: %v", p)
4090 } else if !evex.RoundingEnabled() {
4091 ctxt.Diag("unsupported rounding: %v", p)
4094 evexLL = suffix.rounding
4095 case suffix.broadcast:
4096 if rm == nil || rm.Type != obj.TYPE_MEM {
4097 ctxt.Diag("illegal broadcast without memory argument: %v", p)
4098 } else if !evex.BroadcastEnabled() {
4099 ctxt.Diag("unsupported broadcast: %v", p)
4103 if rm != nil && rm.Type == obj.TYPE_MEM {
4104 ctxt.Diag("illegal SAE with memory argument: %v", p)
4105 } else if !evex.SaeEnabled() {
4106 ctxt.Diag("unsupported SAE: %v", p)
4110 if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
4112 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
4113 evexV = 0 // VSR selector 5th bit.
4116 evexA = byte(reg[k.Reg])
4118 // P2 = [z][L'L][b][V'][aaa]
4119 p2 := (evexZ << 7) |
4125 const evexEscapeByte = 0x62
4126 ab.Put4(evexEscapeByte, p0, p1, p2)
4127 ab.Put1(evex.opcode)
4130 // Emit VEX prefix and opcode byte.
4131 // The three addresses are the r/m, vvvv, and reg fields.
4132 // The reg and rm arguments appear in the same order as the
4133 // arguments to asmand, which typically follows the call to asmvex.
4134 // The final two arguments are the VEX prefix (see encoding above)
4135 // and the opcode byte.
4136 // For details about vex prefix see:
4137 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
4138 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
4142 rexR = regrex[r.Reg] & Rxr
4147 rexB = regrex[rm.Reg] & Rxb
4148 rexX = regrex[rm.Index] & Rxx
4150 vexM := (vex >> 3) & 0x7
4151 vexWLP := vex & 0x87
4154 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4157 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
4158 // Can use 2-byte encoding.
4159 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
4161 // Must use 3-byte encoding.
4163 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
4170 // regIndex returns register index that fits in 5 bits.
4172 // R : 3 bit | legacy instructions | N/A
4173 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
4174 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex
4182 func regIndex(r int16) int {
4183 lower3bits := reg[r]
4184 high4bit := regrex[r] & Rxr << 1
4185 high5bit := regrex[r] & RxrEvex << 0
4186 return lower3bits | high4bit | high5bit
4189 // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
4190 // Reports errors via ctxt.
4191 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4192 // If any pair of the index, mask, or destination registers
4193 // are the same, illegal instruction trap (#UD) is triggered.
4194 index := regIndex(p.GetFrom3().Index)
4195 mask := regIndex(p.From.Reg)
4196 dest := regIndex(p.To.Reg)
4197 if dest == mask || dest == index || mask == index {
4198 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
4205 // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
4206 // Reports errors via ctxt.
4207 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4208 // Illegal instruction trap (#UD) is triggered if the destination vector
4209 // register is the same as index vector in VSIB.
4210 index := regIndex(p.From.Index)
4211 dest := regIndex(p.To.Reg)
4213 ctxt.Diag("index and destination registers should be distinct: %v", p)
4220 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
4221 o := opindex[p.As&obj.AMask]
4224 ctxt.Diag("asmins: missing op %v", p)
4228 if pre := prefixof(ctxt, &p.From); pre != 0 {
4231 if pre := prefixof(ctxt, &p.To); pre != 0 {
4235 // Checks to warn about instruction/arguments combinations that
4236 // will unconditionally trigger illegal instruction trap (#UD).
4246 if p.GetFrom3() == nil {
4247 // gathers need a 3rd arg. See issue 58822.
4248 ctxt.Diag("need a third arg for gather instruction: %v", p)
4251 // AVX512 gather requires explicit K mask.
4252 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
4253 if !avx512gatherValid(ctxt, p) {
4257 if !avx2gatherValid(ctxt, p) {
4264 p.Ft = uint8(oclass(ctxt, p, &p.From))
4267 p.Tt = uint8(oclass(ctxt, p, &p.To))
4270 ft := int(p.Ft) * Ymax
4272 tt := int(p.Tt) * Ymax
4274 xo := obj.Bool2int(o.op[0] == 0x0f)
4284 args := make([]int, 0, argListMax)
4285 if ft != Ynone*Ymax {
4286 args = append(args, ft)
4288 for i := range p.RestArgs {
4289 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
4291 if tt != Ynone*Ymax {
4292 args = append(args, tt)
4295 for _, yt := range o.ytab {
4296 // ytab matching is purely args-based,
4297 // but AVX512 suffixes like "Z" or "RU_SAE" will
4298 // add EVEX-only filter that will reject non-EVEX matches.
4300 // Consider "VADDPD.BCST 2032(DX), X0, X0".
4301 // Without this rule, operands will lead to VEX-encoded form
4302 // and produce "c5b15813" encoding.
4303 if !yt.match(args) {
4304 // "xo" is always zero for VEX/EVEX encoded insts.
4305 z += int(yt.zoffset) + xo
4307 if p.Scond != 0 && !evexZcase(yt.zcase) {
4308 // Do not signal error and continue to search
4309 // for matching EVEX-encoded form.
4310 z += int(yt.zoffset)
4315 case Px1: // first option valid only in 32-bit mode
4316 if ctxt.Arch.Family == sys.AMD64 && z == 0 {
4317 z += int(yt.zoffset) + xo
4320 case Pq: // 16 bit escape and opcode escape
4323 case Pq3: // 16 bit escape and opcode escape + REX.W
4327 case Pq4: // 66 0F 38
4328 ab.Put3(0x66, 0x0F, 0x38)
4330 case Pq4w: // 66 0F 38 + REX.W
4332 ab.Put3(0x66, 0x0F, 0x38)
4334 case Pq5: // F3 0F 38
4335 ab.Put3(0xF3, 0x0F, 0x38)
4337 case Pq5w: // F3 0F 38 + REX.W
4339 ab.Put3(0xF3, 0x0F, 0x38)
4341 case Pf2, // xmm opcode escape
4343 ab.Put2(o.prefix, Pm)
4346 ab.Put3(Pe, Pf3, Pm)
4348 case Pfw: // xmm opcode escape + REX.W
4352 case Pm: // opcode escape
4355 case Pe: // 16 bit escape
4358 case Pw: // 64-bit escape
4359 if ctxt.Arch.Family != sys.AMD64 {
4360 ctxt.Diag("asmins: illegal 64: %v", p)
4364 case Pw8: // 64-bit escape if z >= 8
4366 if ctxt.Arch.Family != sys.AMD64 {
4367 ctxt.Diag("asmins: illegal 64: %v", p)
4373 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
4376 // NOTE(rsc): This is probably safe to do always,
4377 // but when enabled it chooses different encodings
4378 // than the old cmd/internal/obj/i386 code did,
4379 // which breaks our "same bits out" checks.
4380 // In particular, CMPB AX, $0 encodes as 80 f8 00
4381 // in the original obj/i386, and it would encode
4382 // (using a valid, shorter form) as 3c 00 if we enabled
4383 // the call to bytereg here.
4384 if ctxt.Arch.Family == sys.AMD64 {
4385 bytereg(&p.From, &p.Ft)
4386 bytereg(&p.To, &p.Tt)
4389 case P32: // 32 bit but illegal if 64-bit mode
4390 if ctxt.Arch.Family == sys.AMD64 {
4391 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
4394 case Py: // 64-bit only, no prefix
4395 if ctxt.Arch.Family != sys.AMD64 {
4396 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4399 case Py1: // 64-bit only if z < 1, no prefix
4400 if z < 1 && ctxt.Arch.Family != sys.AMD64 {
4401 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4404 case Py3: // 64-bit only if z < 3, no prefix
4405 if z < 3 && ctxt.Arch.Family != sys.AMD64 {
4406 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4411 log.Fatalf("asmins bad table %v", p)
4422 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
4429 ab.PutOpBytesLit(z, &o.op)
4432 ab.PutOpBytesLit(z, &o.op)
4433 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4436 ab.PutOpBytesLit(z, &o.op)
4437 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4440 ab.PutOpBytesLit(z, &o.op)
4441 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4444 bytereg(&p.From, &p.Ft)
4449 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4453 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4456 ab.Put2(byte(op), o.op[z+1])
4457 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4460 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4461 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4465 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4466 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4469 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4470 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
4471 ab.Put1(byte(p.To.Offset))
4473 case Zibm_r, Zibr_m:
4474 ab.PutOpBytesLit(z, &o.op)
4475 if yt.zcase == Zibr_m {
4476 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4478 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4482 ab.Put1(byte(p.From.Offset))
4483 case yt.args[0] == Yi32 && o.prefix == Pe:
4484 ab.PutInt16(int16(p.From.Offset))
4485 case yt.args[0] == Yi32:
4486 ab.PutInt32(int32(p.From.Offset))
4490 ab.Put1(0x8d) // leal
4491 if p.From.Type != obj.TYPE_ADDR {
4492 ctxt.Diag("asmins: Zaut sb type ADDR")
4494 p.From.Type = obj.TYPE_MEM
4495 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4496 p.From.Type = obj.TYPE_ADDR
4500 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4504 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4507 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4510 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4511 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4514 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4515 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4518 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4519 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
4520 ab.Put1(byte(p.From.Offset))
4523 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4525 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
4526 regnum &= byte(p.GetFrom3().Reg - REG_X0)
4528 regnum &= byte(p.GetFrom3().Reg - REG_Y0)
4530 ab.Put1(o.op[z+2] | regnum)
4531 ab.Put1(byte(p.From.Offset))
4534 imm, from, from3, to := unpackOps4(p)
4535 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4536 ab.asmand(ctxt, cursym, p, from, to)
4537 ab.Put1(byte(imm.Offset))
4540 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
4541 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4542 ab.Put1(byte(p.From.Offset))
4545 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
4546 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4549 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
4550 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4553 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
4554 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4557 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
4558 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4559 ab.Put1(byte(p.From.Offset))
4561 case Zvex_hr_rm_v_r:
4562 hr, from, from3, to := unpackOps4(p)
4563 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4564 ab.asmand(ctxt, cursym, p, from, to)
4565 ab.Put1(byte(regIndex(hr.Reg) << 4))
4568 ab.evex = newEVEXBits(z, &o.op)
4569 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
4570 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
4573 ab.evex = newEVEXBits(z, &o.op)
4574 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
4575 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
4576 ab.Put1(byte(p.From.Offset))
4578 case Zevex_i_rm_k_vo:
4579 imm, from, kmask, to := unpackOps4(p)
4580 ab.evex = newEVEXBits(z, &o.op)
4581 ab.asmevex(ctxt, p, from, to, nil, kmask)
4582 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
4583 ab.Put1(byte(imm.Offset))
4586 ab.evex = newEVEXBits(z, &o.op)
4587 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
4588 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4589 ab.Put1(byte(p.From.Offset))
4591 case Zevex_i_r_k_rm:
4592 imm, from, kmask, to := unpackOps4(p)
4593 ab.evex = newEVEXBits(z, &o.op)
4594 ab.asmevex(ctxt, p, to, nil, from, kmask)
4595 ab.asmand(ctxt, cursym, p, to, from)
4596 ab.Put1(byte(imm.Offset))
4599 ab.evex = newEVEXBits(z, &o.op)
4600 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
4601 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4602 ab.Put1(byte(p.From.Offset))
4604 case Zevex_i_rm_k_r:
4605 imm, from, kmask, to := unpackOps4(p)
4606 ab.evex = newEVEXBits(z, &o.op)
4607 ab.asmevex(ctxt, p, from, nil, to, kmask)
4608 ab.asmand(ctxt, cursym, p, from, to)
4609 ab.Put1(byte(imm.Offset))
4611 case Zevex_i_rm_v_r:
4612 imm, from, from3, to := unpackOps4(p)
4613 ab.evex = newEVEXBits(z, &o.op)
4614 ab.asmevex(ctxt, p, from, from3, to, nil)
4615 ab.asmand(ctxt, cursym, p, from, to)
4616 ab.Put1(byte(imm.Offset))
4618 case Zevex_i_rm_v_k_r:
4619 imm, from, from3, kmask, to := unpackOps5(p)
4620 ab.evex = newEVEXBits(z, &o.op)
4621 ab.asmevex(ctxt, p, from, from3, to, kmask)
4622 ab.asmand(ctxt, cursym, p, from, to)
4623 ab.Put1(byte(imm.Offset))
4626 ab.evex = newEVEXBits(z, &o.op)
4627 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
4628 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4631 ab.evex = newEVEXBits(z, &o.op)
4632 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
4633 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4636 ab.evex = newEVEXBits(z, &o.op)
4637 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
4638 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4641 ab.evex = newEVEXBits(z, &o.op)
4642 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
4643 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4645 case Zevex_rm_v_k_r:
4646 from, from3, kmask, to := unpackOps4(p)
4647 ab.evex = newEVEXBits(z, &o.op)
4648 ab.asmevex(ctxt, p, from, from3, to, kmask)
4649 ab.asmand(ctxt, cursym, p, from, to)
4651 case Zevex_r_v_k_rm:
4652 from, from3, kmask, to := unpackOps4(p)
4653 ab.evex = newEVEXBits(z, &o.op)
4654 ab.asmevex(ctxt, p, to, from3, from, kmask)
4655 ab.asmand(ctxt, cursym, p, to, from)
4658 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4659 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4663 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4664 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4668 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4671 r = obj.Addrel(cursym)
4673 r.Type = objabi.R_CALLIND
4679 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
4683 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4684 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
4688 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4689 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4692 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4693 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4694 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4697 if yt.zcase == Zib_ {
4703 if p.As == AXABORT {
4706 ab.Put1(byte(vaddr(ctxt, p, a, nil)))
4709 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4710 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
4713 ab.rexflag |= regrex[p.To.Reg] & Rxb
4714 ab.Put1(byte(op + reg[p.To.Reg]))
4716 v = vaddr(ctxt, p, &p.From, nil)
4717 ab.PutInt16(int16(v))
4719 ab.relput4(ctxt, cursym, p, &p.From)
4724 if p.From.Type != obj.TYPE_NONE {
4725 v = vaddr(ctxt, p, &p.From, nil)
4726 ab.PutInt16(int16(v))
4730 v = vaddr(ctxt, p, &p.From, &rel)
4732 if l == 0 && rel.Siz != 8 {
4733 ab.rexflag &^= (0x40 | Rxw)
4735 ab.rexflag |= regrex[p.To.Reg] & Rxb
4736 ab.Put1(byte(0xb8 + reg[p.To.Reg]))
4738 r = obj.Addrel(cursym)
4740 r.Off = int32(p.Pc + int64(ab.Len()))
4743 ab.PutInt32(int32(v))
4744 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
4746 ab.asmando(ctxt, cursym, p, &p.To, 0)
4748 ab.PutInt32(int32(v)) // need all 8
4750 ab.rexflag |= regrex[p.To.Reg] & Rxb
4751 ab.Put1(byte(op + reg[p.To.Reg]))
4753 r = obj.Addrel(cursym)
4755 r.Off = int32(p.Pc + int64(ab.Len()))
4763 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4764 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4767 if yt.zcase == Zil_ {
4774 v = vaddr(ctxt, p, a, nil)
4775 ab.PutInt16(int16(v))
4777 ab.relput4(ctxt, cursym, p, a)
4780 case Zm_ilo, Zilo_m:
4782 if yt.zcase == Zilo_m {
4784 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4787 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4791 v = vaddr(ctxt, p, a, nil)
4792 ab.PutInt16(int16(v))
4794 ab.relput4(ctxt, cursym, p, a)
4799 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4801 v = vaddr(ctxt, p, &p.From, nil)
4802 ab.PutInt16(int16(v))
4804 ab.relput4(ctxt, cursym, p, &p.From)
4808 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4809 ab.Put1(byte(op + reg[p.To.Reg]))
4812 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
4813 ab.Put1(byte(op + reg[p.From.Reg]))
4815 case Zcallcon, Zjmpcon:
4816 if yt.zcase == Zcallcon {
4821 r = obj.Addrel(cursym)
4822 r.Off = int32(p.Pc + int64(ab.Len()))
4823 r.Type = objabi.R_PCREL
4829 ab.Put2(byte(op), o.op[z+1])
4830 r = obj.Addrel(cursym)
4831 r.Off = int32(p.Pc + int64(ab.Len()))
4832 if ctxt.Arch.Family == sys.AMD64 {
4833 r.Type = objabi.R_PCREL
4835 r.Type = objabi.R_ADDR
4842 case Zcall, Zcallduff:
4843 if p.To.Sym == nil {
4844 ctxt.Diag("call without target")
4846 log.Fatalf("bad code")
4849 if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
4850 ctxt.Diag("directly calling duff when dynamically linking Go")
4853 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4854 // Maintain BP around call, since duffcopy/duffzero can't do it
4855 // (the call jumps into the middle of the function).
4856 // This makes it possible to see call sites for duffcopy/duffzero in
4857 // BP-based profiling tools like Linux perf (which is the
4858 // whole point of maintaining frame pointers in Go).
4864 r = obj.Addrel(cursym)
4865 r.Off = int32(p.Pc + int64(ab.Len()))
4868 r.Type = objabi.R_CALL
4872 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4873 // Pop BP pushed above.
4878 // TODO: jump across functions needs reloc
4879 case Zbr, Zjmp, Zloop:
4880 if p.As == AXBEGIN {
4883 if p.To.Sym != nil {
4884 if yt.zcase != Zjmp {
4885 ctxt.Diag("branch to ATEXT")
4887 log.Fatalf("bad code")
4891 r = obj.Addrel(cursym)
4892 r.Off = int32(p.Pc + int64(ab.Len()))
4894 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
4895 // it can point to a trampoline instead of the destination itself.
4896 r.Type = objabi.R_CALL
4902 // Assumes q is in this function.
4903 // TODO: Check in input, preserve in brchain.
4905 // Fill in backward jump now.
4909 ctxt.Diag("jmp/branch/loop without target")
4911 log.Fatalf("bad code")
4914 if p.Back&branchBackwards != 0 {
4915 v = q.Pc - (p.Pc + 2)
4916 if v >= -128 && p.As != AXBEGIN {
4920 ab.Put2(byte(op), byte(v))
4921 } else if yt.zcase == Zloop {
4922 ctxt.Diag("loop too far: %v", p)
4925 if p.As == AXBEGIN {
4928 if yt.zcase == Zbr {
4934 ab.PutInt32(int32(v))
4940 // Annotate target; will fill in later.
4944 if p.Back&branchShort != 0 && p.As != AXBEGIN {
4948 ab.Put2(byte(op), 0)
4949 } else if yt.zcase == Zloop {
4950 ctxt.Diag("loop too far: %v", p)
4952 if yt.zcase == Zbr {
4960 v = vaddr(ctxt, p, &p.From, &rel)
4963 r = obj.Addrel(cursym)
4965 r.Off = int32(p.Pc + int64(ab.Len()))
4970 ab.Put1(byte(v >> 8))
4972 ab.PutInt16(int16(v >> 16))
4974 ab.PutInt32(int32(v >> 32))
4984 if p.GetFrom3() != nil {
4985 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
4987 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
4990 if p.As == mo[0].as {
4991 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
4995 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
4998 for z = 0; t[z] != 0; z++ {
5004 ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
5008 ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
5010 case movRegMem2op: // r,m - 2op
5012 ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
5013 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
5017 ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
5018 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
5044 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
5046 case movDoubleShift:
5048 if ctxt.Arch.Family != sys.AMD64 {
5049 ctxt.Diag("asmins: illegal 64: %v", p)
5053 } else if t[0] == Pe {
5058 switch p.From.Type {
5062 case obj.TYPE_CONST:
5064 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5065 ab.Put1(byte(p.From.Offset))
5072 case REG_CL, REG_CX:
5074 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5078 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5079 // where you load the TLS base register into a register and then index off that
5080 // register to access the actual TLS variables. Systems that allow direct TLS access
5081 // are handled in prefixof above and should not be listed here.
5083 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
5084 ctxt.Diag("invalid load of TLS: %v", p)
5087 if ctxt.Arch.Family == sys.I386 {
5088 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5089 // where you load the TLS base register into a register and then index off that
5090 // register to access the actual TLS variables. Systems that allow direct TLS access
5091 // are handled in prefixof above and should not be listed here.
5092 switch ctxt.Headtype {
5094 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5096 case objabi.Hlinux, objabi.Hfreebsd:
5097 if ctxt.Flag_shared {
5098 // Note that this is not generating the same insns as the other cases.
5101 // call __x86.get_pc_thunk.dst
5102 // movl (gotpc + g@gotntpoff)(dst), dst
5103 // which is encoded as
5104 // call __x86.get_pc_thunk.dst
5106 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
5107 // is g, which we can't check here, but will when we assemble the second
5111 r = obj.Addrel(cursym)
5112 r.Off = int32(p.Pc + int64(ab.Len()))
5113 r.Type = objabi.R_CALL
5115 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
5118 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
5119 r = obj.Addrel(cursym)
5120 r.Off = int32(p.Pc + int64(ab.Len()))
5121 r.Type = objabi.R_TLS_IE
5126 // ELF TLS base is 0(GS).
5129 pp.From.Type = obj.TYPE_MEM
5130 pp.From.Reg = REG_GS
5132 pp.From.Index = REG_NONE
5136 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5139 pp.From = obj.Addr{}
5140 pp.From.Type = obj.TYPE_MEM
5141 pp.From.Name = obj.NAME_EXTERN
5142 pp.From.Sym = plan9privates
5144 pp.From.Index = REG_NONE
5146 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5151 switch ctxt.Headtype {
5153 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5155 case objabi.Hlinux, objabi.Hfreebsd:
5156 if !ctxt.Flag_shared {
5157 log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
5159 // Note that this is not generating the same insn as the other cases.
5162 // movq g@gottpoff(%rip), R_to
5163 // which is encoded as
5164 // movq 0(%rip), R_to
5165 // and a R_TLS_IE reloc. This all assumes the only tls variable we access
5166 // is g, which we can't check here, but will when we assemble the second
5168 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
5170 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
5171 r = obj.Addrel(cursym)
5172 r.Off = int32(p.Pc + int64(ab.Len()))
5173 r.Type = objabi.R_TLS_IE
5179 pp.From = obj.Addr{}
5180 pp.From.Type = obj.TYPE_MEM
5181 pp.From.Name = obj.NAME_EXTERN
5182 pp.From.Sym = plan9privates
5184 pp.From.Index = REG_NONE
5187 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5189 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
5190 // TLS base is 0(FS).
5193 pp.From.Type = obj.TYPE_MEM
5194 pp.From.Name = obj.NAME_NONE
5195 pp.From.Reg = REG_NONE
5197 pp.From.Index = REG_NONE
5202 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5212 if ctxt.Arch.Family != sys.AMD64 {
5213 // here, the assembly has failed.
5214 // if it's a byte instruction that has
5215 // unaddressable registers, try to
5216 // exchange registers and reissue the
5217 // instruction with the operands renamed.
5220 unbytereg(&pp.From, &pp.Ft)
5221 unbytereg(&pp.To, &pp.Tt)
5223 z := int(p.From.Reg)
5224 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5225 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5226 // For now, different to keep bit-for-bit compatibility.
5227 if ctxt.Arch.Family == sys.I386 {
5228 breg := byteswapreg(ctxt, &p.To)
5230 ab.Put1(0x87) // xchg lhs,bx
5231 ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5232 subreg(&pp, z, breg)
5233 ab.doasm(ctxt, cursym, &pp)
5234 ab.Put1(0x87) // xchg lhs,bx
5235 ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5237 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5238 subreg(&pp, z, REG_AX)
5239 ab.doasm(ctxt, cursym, &pp)
5240 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5245 if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
5246 // We certainly don't want to exchange
5247 // with AX if the op is MUL or DIV.
5248 ab.Put1(0x87) // xchg lhs,bx
5249 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5250 subreg(&pp, z, REG_BX)
5251 ab.doasm(ctxt, cursym, &pp)
5252 ab.Put1(0x87) // xchg lhs,bx
5253 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5255 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5256 subreg(&pp, z, REG_AX)
5257 ab.doasm(ctxt, cursym, &pp)
5258 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5264 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5265 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5266 // For now, different to keep bit-for-bit compatibility.
5267 if ctxt.Arch.Family == sys.I386 {
5268 breg := byteswapreg(ctxt, &p.From)
5270 ab.Put1(0x87) //xchg rhs,bx
5271 ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5272 subreg(&pp, z, breg)
5273 ab.doasm(ctxt, cursym, &pp)
5274 ab.Put1(0x87) // xchg rhs,bx
5275 ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5277 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5278 subreg(&pp, z, REG_AX)
5279 ab.doasm(ctxt, cursym, &pp)
5280 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5286 ab.Put1(0x87) // xchg rhs,bx
5287 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5288 subreg(&pp, z, REG_BX)
5289 ab.doasm(ctxt, cursym, &pp)
5290 ab.Put1(0x87) // xchg rhs,bx
5291 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5293 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5294 subreg(&pp, z, REG_AX)
5295 ab.doasm(ctxt, cursym, &pp)
5296 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5302 ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
5305 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
5306 // which is not referenced in a.
5307 // If a is empty, it returns BX to account for MULB-like instructions
5308 // that might use DX and AX.
5309 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
5310 cana, canb, canc, cand := true, true, true, true
5311 if a.Type == obj.TYPE_NONE {
5312 cana, cand = false, false
5315 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
5318 cana, cand = false, false
5319 case REG_AX, REG_AL, REG_AH:
5321 case REG_BX, REG_BL, REG_BH:
5323 case REG_CX, REG_CL, REG_CH:
5325 case REG_DX, REG_DL, REG_DH:
5330 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
5353 ctxt.Diag("impossible byte register")
5355 log.Fatalf("bad code")
5360 func isbadbyte(a *obj.Addr) bool {
5361 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
5364 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
5371 ab.doasm(ctxt, cursym, p)
5372 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5373 // as befits the whole approach of the architecture,
5374 // the rex prefix must appear before the first opcode byte
5375 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
5376 // before the 0f opcode escape!), or it might be ignored.
5377 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
5378 if ctxt.Arch.Family != sys.AMD64 {
5379 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
5383 for np = mark; np < n; np++ {
5385 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
5389 ab.Insert(np, byte(0x40|ab.rexflag))
5393 for i := len(cursym.R) - 1; i >= 0; i-- {
5395 if int64(r.Off) < p.Pc {
5398 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5401 if r.Type == objabi.R_PCREL {
5402 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
5403 // PC-relative addressing is relative to the end of the instruction,
5404 // but the relocations applied by the linker are relative to the end
5405 // of the relocation. Because immediate instruction
5406 // arguments can follow the PC-relative memory reference in the
5407 // instruction encoding, the two may not coincide. In this case,
5408 // adjust addend so that linker can keep relocating relative to the
5409 // end of the relocation.
5410 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
5411 } else if ctxt.Arch.Family == sys.I386 {
5412 // On 386 PC-relative addressing (for non-call/jmp instructions)
5413 // assumes that the previous instruction loaded the PC of the end
5414 // of that instruction into CX, so the adjustment is relative to
5416 r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5419 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
5420 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
5421 r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5427 // unpackOps4 extracts 4 operands from p.
5428 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
5429 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
5432 // unpackOps5 extracts 5 operands from p.
5433 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
5434 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To