]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/internal/obj/x86/asm6.go
all: fix typos
[gostls13.git] / src / cmd / internal / obj / x86 / asm6.go
1 // Inferno utils/6l/span.c
2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
3 //
4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 //      Portions Copyright © 2004,2006 Bruce Ellis
9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 //      Portions Copyright © 2009 The Go Authors. All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 package x86
32
33 import (
34         "cmd/internal/obj"
35         "cmd/internal/objabi"
36         "cmd/internal/sys"
37         "encoding/binary"
38         "fmt"
39         "internal/buildcfg"
40         "log"
41         "strings"
42 )
43
44 var (
45         plan9privates *obj.LSym
46 )
47
48 // Instruction layout.
49
50 // Loop alignment constants:
51 // want to align loop entry to loopAlign-byte boundary,
52 // and willing to insert at most maxLoopPad bytes of NOP to do so.
53 // We define a loop entry as the target of a backward jump.
54 //
55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
56 // and it aligns all jump targets, not just backward jump targets.
57 //
58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
59 // is very slight but negative, so the alignment is disabled by
60 // setting MaxLoopPad = 0. The code is here for reference and
61 // for future experiments.
62 const (
63         loopAlign  = 16
64         maxLoopPad = 0
65 )
66
67 // Bit flags that are used to express jump target properties.
68 const (
69         // branchBackwards marks targets that are located behind.
70         // Used to express jumps to loop headers.
71         branchBackwards = (1 << iota)
72         // branchShort marks branches those target is close,
73         // with offset is in -128..127 range.
74         branchShort
75         // branchLoopHead marks loop entry.
76         // Used to insert padding for misaligned loops.
77         branchLoopHead
78 )
79
80 // opBytes holds optab encoding bytes.
81 // Each ytab reserves fixed amount of bytes in this array.
82 //
83 // The size should be the minimal number of bytes that
84 // are enough to hold biggest optab op lines.
85 type opBytes [31]uint8
86
87 type Optab struct {
88         as     obj.As
89         ytab   []ytab
90         prefix uint8
91         op     opBytes
92 }
93
94 type movtab struct {
95         as   obj.As
96         ft   uint8
97         f3t  uint8
98         tt   uint8
99         code uint8
100         op   [4]uint8
101 }
102
103 const (
104         Yxxx = iota
105         Ynone
106         Yi0 // $0
107         Yi1 // $1
108         Yu2 // $x, x fits in uint2
109         Yi8 // $x, x fits in int8
110         Yu8 // $x, x fits in uint8
111         Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
112         Ys32
113         Yi32
114         Yi64
115         Yiauto
116         Yal
117         Ycl
118         Yax
119         Ycx
120         Yrb
121         Yrl
122         Yrl32 // Yrl on 32-bit system
123         Yrf
124         Yf0
125         Yrx
126         Ymb
127         Yml
128         Ym
129         Ybr
130         Ycs
131         Yss
132         Yds
133         Yes
134         Yfs
135         Ygs
136         Ygdtr
137         Yidtr
138         Yldtr
139         Ymsw
140         Ytask
141         Ycr0
142         Ycr1
143         Ycr2
144         Ycr3
145         Ycr4
146         Ycr5
147         Ycr6
148         Ycr7
149         Ycr8
150         Ydr0
151         Ydr1
152         Ydr2
153         Ydr3
154         Ydr4
155         Ydr5
156         Ydr6
157         Ydr7
158         Ytr0
159         Ytr1
160         Ytr2
161         Ytr3
162         Ytr4
163         Ytr5
164         Ytr6
165         Ytr7
166         Ymr
167         Ymm
168         Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
169         YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
170         Yxr           // X0..X15
171         YxrEvex       // X0..X31
172         Yxm
173         YxmEvex       // YxrEvex+Ym
174         Yxvm          // VSIB vector array; vm32x/vm64x
175         YxvmEvex      // Yxvm which permits High-16 X register as index.
176         YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
177         Yyr           // Y0..Y15
178         YyrEvex       // Y0..Y31
179         Yym
180         YymEvex   // YyrEvex+Ym
181         Yyvm      // VSIB vector array; vm32y/vm64y
182         YyvmEvex  // Yyvm which permits High-16 Y register as index.
183         YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
184         Yzr       // Z0..Z31
185         Yzm       // Yzr+Ym
186         Yzvm      // VSIB vector array; vm32z/vm64z
187         Yk0       // K0
188         Yknot0    // K1..K7; write mask
189         Yk        // K0..K7; used for KOP
190         Ykm       // Yk+Ym; used for KOP
191         Ytls
192         Ytextsize
193         Yindir
194         Ymax
195 )
196
197 const (
198         Zxxx = iota
199         Zlit
200         Zlitm_r
201         Zlitr_m
202         Zlit_m_r
203         Z_rp
204         Zbr
205         Zcall
206         Zcallcon
207         Zcallduff
208         Zcallind
209         Zcallindreg
210         Zib_
211         Zib_rp
212         Zibo_m
213         Zibo_m_xm
214         Zil_
215         Zil_rp
216         Ziq_rp
217         Zilo_m
218         Zjmp
219         Zjmpcon
220         Zloop
221         Zo_iw
222         Zm_o
223         Zm_r
224         Z_m_r
225         Zm2_r
226         Zm_r_xm
227         Zm_r_i_xm
228         Zm_r_xm_nr
229         Zr_m_xm_nr
230         Zibm_r // mmx1,mmx2/mem64,imm8
231         Zibr_m
232         Zmb_r
233         Zaut_r
234         Zo_m
235         Zo_m64
236         Zpseudo
237         Zr_m
238         Zr_m_xm
239         Zrp_
240         Z_ib
241         Z_il
242         Zm_ibo
243         Zm_ilo
244         Zib_rr
245         Zil_rr
246         Zbyte
247
248         Zvex_rm_v_r
249         Zvex_rm_v_ro
250         Zvex_r_v_rm
251         Zvex_i_rm_vo
252         Zvex_v_rm_r
253         Zvex_i_rm_r
254         Zvex_i_r_v
255         Zvex_i_rm_v_r
256         Zvex
257         Zvex_rm_r_vo
258         Zvex_i_r_rm
259         Zvex_hr_rm_v_r
260
261         Zevex_first
262         Zevex_i_r_k_rm
263         Zevex_i_r_rm
264         Zevex_i_rm_k_r
265         Zevex_i_rm_k_vo
266         Zevex_i_rm_r
267         Zevex_i_rm_v_k_r
268         Zevex_i_rm_v_r
269         Zevex_i_rm_vo
270         Zevex_k_rmo
271         Zevex_r_k_rm
272         Zevex_r_v_k_rm
273         Zevex_r_v_rm
274         Zevex_rm_k_r
275         Zevex_rm_v_k_r
276         Zevex_rm_v_r
277         Zevex_last
278
279         Zmax
280 )
281
282 const (
283         Px   = 0
284         Px1  = 1    // symbolic; exact value doesn't matter
285         P32  = 0x32 // 32-bit only
286         Pe   = 0x66 // operand escape
287         Pm   = 0x0f // 2byte opcode escape
288         Pq   = 0xff // both escapes: 66 0f
289         Pb   = 0xfe // byte operands
290         Pf2  = 0xf2 // xmm escape 1: f2 0f
291         Pf3  = 0xf3 // xmm escape 2: f3 0f
292         Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
293         Pq3  = 0x67 // xmm escape 3: 66 48 0f
294         Pq4  = 0x68 // xmm escape 4: 66 0F 38
295         Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
296         Pq5  = 0x6a // xmm escape 5: F3 0F 38
297         Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
298         Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
299         Pw   = 0x48 // Rex.w
300         Pw8  = 0x90 // symbolic; exact value doesn't matter
301         Py   = 0x80 // defaults to 64-bit mode
302         Py1  = 0x81 // symbolic; exact value doesn't matter
303         Py3  = 0x83 // symbolic; exact value doesn't matter
304         Pavx = 0x84 // symbolic: exact value doesn't matter
305
306         RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
307         Rxw     = 1 << 3 // =1, 64-bit operand size
308         Rxr     = 1 << 2 // extend modrm reg
309         Rxx     = 1 << 1 // extend sib index
310         Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
311 )
312
313 const (
314         // Encoding for VEX prefix in tables.
315         // The P, L, and W fields are chosen to match
316         // their eventual locations in the VEX prefix bytes.
317
318         // Encoding for VEX prefix in tables.
319         // The P, L, and W fields are chosen to match
320         // their eventual locations in the VEX prefix bytes.
321
322         // Using spare bit to make leading [E]VEX encoding byte different from
323         // 0x0f even if all other VEX fields are 0.
324         avxEscape = 1 << 6
325
326         // P field - 2 bits
327         vex66 = 1 << 0
328         vexF3 = 2 << 0
329         vexF2 = 3 << 0
330         // L field - 1 bit
331         vexLZ  = 0 << 2
332         vexLIG = 0 << 2
333         vex128 = 0 << 2
334         vex256 = 1 << 2
335         // W field - 1 bit
336         vexWIG = 0 << 7
337         vexW0  = 0 << 7
338         vexW1  = 1 << 7
339         // M field - 5 bits, but mostly reserved; we can store up to 3
340         vex0F   = 1 << 3
341         vex0F38 = 2 << 3
342         vex0F3A = 3 << 3
343 )
344
345 var ycover [Ymax * Ymax]uint8
346
347 var reg [MAXREG]int
348
349 var regrex [MAXREG + 1]int
350
351 var ynone = []ytab{
352         {Zlit, 1, argList{}},
353 }
354
355 var ytext = []ytab{
356         {Zpseudo, 0, argList{Ymb, Ytextsize}},
357         {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
358 }
359
360 var ynop = []ytab{
361         {Zpseudo, 0, argList{}},
362         {Zpseudo, 0, argList{Yiauto}},
363         {Zpseudo, 0, argList{Yml}},
364         {Zpseudo, 0, argList{Yrf}},
365         {Zpseudo, 0, argList{Yxr}},
366         {Zpseudo, 0, argList{Yiauto}},
367         {Zpseudo, 0, argList{Yml}},
368         {Zpseudo, 0, argList{Yrf}},
369         {Zpseudo, 1, argList{Yxr}},
370 }
371
372 var yfuncdata = []ytab{
373         {Zpseudo, 0, argList{Yi32, Ym}},
374 }
375
376 var ypcdata = []ytab{
377         {Zpseudo, 0, argList{Yi32, Yi32}},
378 }
379
380 var yxorb = []ytab{
381         {Zib_, 1, argList{Yi32, Yal}},
382         {Zibo_m, 2, argList{Yi32, Ymb}},
383         {Zr_m, 1, argList{Yrb, Ymb}},
384         {Zm_r, 1, argList{Ymb, Yrb}},
385 }
386
387 var yaddl = []ytab{
388         {Zibo_m, 2, argList{Yi8, Yml}},
389         {Zil_, 1, argList{Yi32, Yax}},
390         {Zilo_m, 2, argList{Yi32, Yml}},
391         {Zr_m, 1, argList{Yrl, Yml}},
392         {Zm_r, 1, argList{Yml, Yrl}},
393 }
394
395 var yincl = []ytab{
396         {Z_rp, 1, argList{Yrl}},
397         {Zo_m, 2, argList{Yml}},
398 }
399
400 var yincq = []ytab{
401         {Zo_m, 2, argList{Yml}},
402 }
403
404 var ycmpb = []ytab{
405         {Z_ib, 1, argList{Yal, Yi32}},
406         {Zm_ibo, 2, argList{Ymb, Yi32}},
407         {Zm_r, 1, argList{Ymb, Yrb}},
408         {Zr_m, 1, argList{Yrb, Ymb}},
409 }
410
411 var ycmpl = []ytab{
412         {Zm_ibo, 2, argList{Yml, Yi8}},
413         {Z_il, 1, argList{Yax, Yi32}},
414         {Zm_ilo, 2, argList{Yml, Yi32}},
415         {Zm_r, 1, argList{Yml, Yrl}},
416         {Zr_m, 1, argList{Yrl, Yml}},
417 }
418
419 var yshb = []ytab{
420         {Zo_m, 2, argList{Yi1, Ymb}},
421         {Zibo_m, 2, argList{Yu8, Ymb}},
422         {Zo_m, 2, argList{Ycx, Ymb}},
423 }
424
425 var yshl = []ytab{
426         {Zo_m, 2, argList{Yi1, Yml}},
427         {Zibo_m, 2, argList{Yu8, Yml}},
428         {Zo_m, 2, argList{Ycl, Yml}},
429         {Zo_m, 2, argList{Ycx, Yml}},
430 }
431
432 var ytestl = []ytab{
433         {Zil_, 1, argList{Yi32, Yax}},
434         {Zilo_m, 2, argList{Yi32, Yml}},
435         {Zr_m, 1, argList{Yrl, Yml}},
436         {Zm_r, 1, argList{Yml, Yrl}},
437 }
438
439 var ymovb = []ytab{
440         {Zr_m, 1, argList{Yrb, Ymb}},
441         {Zm_r, 1, argList{Ymb, Yrb}},
442         {Zib_rp, 1, argList{Yi32, Yrb}},
443         {Zibo_m, 2, argList{Yi32, Ymb}},
444 }
445
446 var ybtl = []ytab{
447         {Zibo_m, 2, argList{Yi8, Yml}},
448         {Zr_m, 1, argList{Yrl, Yml}},
449 }
450
451 var ymovw = []ytab{
452         {Zr_m, 1, argList{Yrl, Yml}},
453         {Zm_r, 1, argList{Yml, Yrl}},
454         {Zil_rp, 1, argList{Yi32, Yrl}},
455         {Zilo_m, 2, argList{Yi32, Yml}},
456         {Zaut_r, 2, argList{Yiauto, Yrl}},
457 }
458
459 var ymovl = []ytab{
460         {Zr_m, 1, argList{Yrl, Yml}},
461         {Zm_r, 1, argList{Yml, Yrl}},
462         {Zil_rp, 1, argList{Yi32, Yrl}},
463         {Zilo_m, 2, argList{Yi32, Yml}},
464         {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
465         {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
466         {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
467         {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
468         {Zaut_r, 2, argList{Yiauto, Yrl}},
469 }
470
471 var yret = []ytab{
472         {Zo_iw, 1, argList{}},
473         {Zo_iw, 1, argList{Yi32}},
474 }
475
476 var ymovq = []ytab{
477         // valid in 32-bit mode
478         {Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
479         {Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
480         {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
481         {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
482         {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
483
484         // valid only in 64-bit mode, usually with 64-bit prefix
485         {Zr_m, 1, argList{Yrl, Yml}},      // 0x89
486         {Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
487         {Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
488         {Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
489         {Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
490         {Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
491         {Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
492         {Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
493         {Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
494         {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
495 }
496
497 var ymovbe = []ytab{
498         {Zlitm_r, 3, argList{Ym, Yrl}},
499         {Zlitr_m, 3, argList{Yrl, Ym}},
500 }
501
502 var ym_rl = []ytab{
503         {Zm_r, 1, argList{Ym, Yrl}},
504 }
505
506 var yrl_m = []ytab{
507         {Zr_m, 1, argList{Yrl, Ym}},
508 }
509
510 var ymb_rl = []ytab{
511         {Zmb_r, 1, argList{Ymb, Yrl}},
512 }
513
514 var yml_rl = []ytab{
515         {Zm_r, 1, argList{Yml, Yrl}},
516 }
517
518 var yrl_ml = []ytab{
519         {Zr_m, 1, argList{Yrl, Yml}},
520 }
521
522 var yml_mb = []ytab{
523         {Zr_m, 1, argList{Yrb, Ymb}},
524         {Zm_r, 1, argList{Ymb, Yrb}},
525 }
526
527 var yrb_mb = []ytab{
528         {Zr_m, 1, argList{Yrb, Ymb}},
529 }
530
531 var yxchg = []ytab{
532         {Z_rp, 1, argList{Yax, Yrl}},
533         {Zrp_, 1, argList{Yrl, Yax}},
534         {Zr_m, 1, argList{Yrl, Yml}},
535         {Zm_r, 1, argList{Yml, Yrl}},
536 }
537
538 var ydivl = []ytab{
539         {Zm_o, 2, argList{Yml}},
540 }
541
542 var ydivb = []ytab{
543         {Zm_o, 2, argList{Ymb}},
544 }
545
546 var yimul = []ytab{
547         {Zm_o, 2, argList{Yml}},
548         {Zib_rr, 1, argList{Yi8, Yrl}},
549         {Zil_rr, 1, argList{Yi32, Yrl}},
550         {Zm_r, 2, argList{Yml, Yrl}},
551 }
552
553 var yimul3 = []ytab{
554         {Zibm_r, 2, argList{Yi8, Yml, Yrl}},
555         {Zibm_r, 2, argList{Yi32, Yml, Yrl}},
556 }
557
558 var ybyte = []ytab{
559         {Zbyte, 1, argList{Yi64}},
560 }
561
562 var yin = []ytab{
563         {Zib_, 1, argList{Yi32}},
564         {Zlit, 1, argList{}},
565 }
566
567 var yint = []ytab{
568         {Zib_, 1, argList{Yi32}},
569 }
570
571 var ypushl = []ytab{
572         {Zrp_, 1, argList{Yrl}},
573         {Zm_o, 2, argList{Ym}},
574         {Zib_, 1, argList{Yi8}},
575         {Zil_, 1, argList{Yi32}},
576 }
577
578 var ypopl = []ytab{
579         {Z_rp, 1, argList{Yrl}},
580         {Zo_m, 2, argList{Ym}},
581 }
582
583 var ywrfsbase = []ytab{
584         {Zm_o, 2, argList{Yrl}},
585 }
586
587 var yrdrand = []ytab{
588         {Zo_m, 2, argList{Yrl}},
589 }
590
591 var yclflush = []ytab{
592         {Zo_m, 2, argList{Ym}},
593 }
594
595 var ybswap = []ytab{
596         {Z_rp, 2, argList{Yrl}},
597 }
598
599 var yscond = []ytab{
600         {Zo_m, 2, argList{Ymb}},
601 }
602
603 var yjcond = []ytab{
604         {Zbr, 0, argList{Ybr}},
605         {Zbr, 0, argList{Yi0, Ybr}},
606         {Zbr, 1, argList{Yi1, Ybr}},
607 }
608
609 var yloop = []ytab{
610         {Zloop, 1, argList{Ybr}},
611 }
612
613 var ycall = []ytab{
614         {Zcallindreg, 0, argList{Yml}},
615         {Zcallindreg, 2, argList{Yrx, Yrx}},
616         {Zcallind, 2, argList{Yindir}},
617         {Zcall, 0, argList{Ybr}},
618         {Zcallcon, 1, argList{Yi32}},
619 }
620
621 var yduff = []ytab{
622         {Zcallduff, 1, argList{Yi32}},
623 }
624
625 var yjmp = []ytab{
626         {Zo_m64, 2, argList{Yml}},
627         {Zjmp, 0, argList{Ybr}},
628         {Zjmpcon, 1, argList{Yi32}},
629 }
630
631 var yfmvd = []ytab{
632         {Zm_o, 2, argList{Ym, Yf0}},
633         {Zo_m, 2, argList{Yf0, Ym}},
634         {Zm_o, 2, argList{Yrf, Yf0}},
635         {Zo_m, 2, argList{Yf0, Yrf}},
636 }
637
638 var yfmvdp = []ytab{
639         {Zo_m, 2, argList{Yf0, Ym}},
640         {Zo_m, 2, argList{Yf0, Yrf}},
641 }
642
643 var yfmvf = []ytab{
644         {Zm_o, 2, argList{Ym, Yf0}},
645         {Zo_m, 2, argList{Yf0, Ym}},
646 }
647
648 var yfmvx = []ytab{
649         {Zm_o, 2, argList{Ym, Yf0}},
650 }
651
652 var yfmvp = []ytab{
653         {Zo_m, 2, argList{Yf0, Ym}},
654 }
655
656 var yfcmv = []ytab{
657         {Zm_o, 2, argList{Yrf, Yf0}},
658 }
659
660 var yfadd = []ytab{
661         {Zm_o, 2, argList{Ym, Yf0}},
662         {Zm_o, 2, argList{Yrf, Yf0}},
663         {Zo_m, 2, argList{Yf0, Yrf}},
664 }
665
666 var yfxch = []ytab{
667         {Zo_m, 2, argList{Yf0, Yrf}},
668         {Zm_o, 2, argList{Yrf, Yf0}},
669 }
670
671 var ycompp = []ytab{
672         {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
673 }
674
675 var ystsw = []ytab{
676         {Zo_m, 2, argList{Ym}},
677         {Zlit, 1, argList{Yax}},
678 }
679
680 var ysvrs_mo = []ytab{
681         {Zm_o, 2, argList{Ym}},
682 }
683
684 // unaryDst version of "ysvrs_mo".
685 var ysvrs_om = []ytab{
686         {Zo_m, 2, argList{Ym}},
687 }
688
689 var ymm = []ytab{
690         {Zm_r_xm, 1, argList{Ymm, Ymr}},
691         {Zm_r_xm, 2, argList{Yxm, Yxr}},
692 }
693
694 var yxm = []ytab{
695         {Zm_r_xm, 1, argList{Yxm, Yxr}},
696 }
697
698 var yxm_q4 = []ytab{
699         {Zm_r, 1, argList{Yxm, Yxr}},
700 }
701
702 var yxcvm1 = []ytab{
703         {Zm_r_xm, 2, argList{Yxm, Yxr}},
704         {Zm_r_xm, 2, argList{Yxm, Ymr}},
705 }
706
707 var yxcvm2 = []ytab{
708         {Zm_r_xm, 2, argList{Yxm, Yxr}},
709         {Zm_r_xm, 2, argList{Ymm, Yxr}},
710 }
711
712 var yxr = []ytab{
713         {Zm_r_xm, 1, argList{Yxr, Yxr}},
714 }
715
716 var yxr_ml = []ytab{
717         {Zr_m_xm, 1, argList{Yxr, Yml}},
718 }
719
720 var ymr = []ytab{
721         {Zm_r, 1, argList{Ymr, Ymr}},
722 }
723
724 var ymr_ml = []ytab{
725         {Zr_m_xm, 1, argList{Ymr, Yml}},
726 }
727
728 var yxcmpi = []ytab{
729         {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
730 }
731
732 var yxmov = []ytab{
733         {Zm_r_xm, 1, argList{Yxm, Yxr}},
734         {Zr_m_xm, 1, argList{Yxr, Yxm}},
735 }
736
737 var yxcvfl = []ytab{
738         {Zm_r_xm, 1, argList{Yxm, Yrl}},
739 }
740
741 var yxcvlf = []ytab{
742         {Zm_r_xm, 1, argList{Yml, Yxr}},
743 }
744
745 var yxcvfq = []ytab{
746         {Zm_r_xm, 2, argList{Yxm, Yrl}},
747 }
748
749 var yxcvqf = []ytab{
750         {Zm_r_xm, 2, argList{Yml, Yxr}},
751 }
752
753 var yps = []ytab{
754         {Zm_r_xm, 1, argList{Ymm, Ymr}},
755         {Zibo_m_xm, 2, argList{Yi8, Ymr}},
756         {Zm_r_xm, 2, argList{Yxm, Yxr}},
757         {Zibo_m_xm, 3, argList{Yi8, Yxr}},
758 }
759
760 var yxrrl = []ytab{
761         {Zm_r, 1, argList{Yxr, Yrl}},
762 }
763
764 var ymrxr = []ytab{
765         {Zm_r, 1, argList{Ymr, Yxr}},
766         {Zm_r_xm, 1, argList{Yxm, Yxr}},
767 }
768
769 var ymshuf = []ytab{
770         {Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
771 }
772
773 var ymshufb = []ytab{
774         {Zm2_r, 2, argList{Yxm, Yxr}},
775 }
776
777 // It should never have more than 1 entry,
778 // because some optab entries have opcode sequences that
779 // are longer than 2 bytes (zoffset=2 here),
780 // ROUNDPD and ROUNDPS and recently added BLENDPD,
781 // to name a few.
782 var yxshuf = []ytab{
783         {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
784 }
785
786 var yextrw = []ytab{
787         {Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
788         {Zibr_m, 2, argList{Yu8, Yxr, Yml}},
789 }
790
791 var yextr = []ytab{
792         {Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
793 }
794
795 var yinsrw = []ytab{
796         {Zibm_r, 2, argList{Yu8, Yml, Yxr}},
797 }
798
799 var yinsr = []ytab{
800         {Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
801 }
802
803 var ypsdq = []ytab{
804         {Zibo_m, 2, argList{Yi8, Yxr}},
805 }
806
807 var ymskb = []ytab{
808         {Zm_r_xm, 2, argList{Yxr, Yrl}},
809         {Zm_r_xm, 1, argList{Ymr, Yrl}},
810 }
811
812 var ycrc32l = []ytab{
813         {Zlitm_r, 0, argList{Yml, Yrl}},
814 }
815
816 var ycrc32b = []ytab{
817         {Zlitm_r, 0, argList{Ymb, Yrl}},
818 }
819
820 var yprefetch = []ytab{
821         {Zm_o, 2, argList{Ym}},
822 }
823
824 var yaes = []ytab{
825         {Zlitm_r, 2, argList{Yxm, Yxr}},
826 }
827
828 var yxbegin = []ytab{
829         {Zjmp, 1, argList{Ybr}},
830 }
831
832 var yxabort = []ytab{
833         {Zib_, 1, argList{Yu8}},
834 }
835
836 var ylddqu = []ytab{
837         {Zm_r, 1, argList{Ym, Yxr}},
838 }
839
840 var ypalignr = []ytab{
841         {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
842 }
843
844 var ysha256rnds2 = []ytab{
845         {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
846 }
847
848 var yblendvpd = []ytab{
849         {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
850 }
851
852 var ymmxmm0f38 = []ytab{
853         {Zlitm_r, 3, argList{Ymm, Ymr}},
854         {Zlitm_r, 5, argList{Yxm, Yxr}},
855 }
856
857 var yextractps = []ytab{
858         {Zibr_m, 2, argList{Yu2, Yxr, Yml}},
859 }
860
861 var ysha1rnds4 = []ytab{
862         {Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
863 }
864
865 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
866 // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
867 // to find the entry with the given p.As and then looks through the ytable for
868 // that instruction (the second field in the optab struct) for a line whose
869 // first two values match the Ytypes of the p.From and p.To operands.  The
870 // function oclass computes the specific Ytype of an operand and then the set
871 // of more general Ytypes that it satisfies is implied by the ycover table, set
872 // up in instinit.  For example, oclass distinguishes the constants 0 and 1
873 // from the more general 8-bit constants, but instinit says
874 //
875 //      ycover[Yi0*Ymax+Ys32] = 1
876 //      ycover[Yi1*Ymax+Ys32] = 1
877 //      ycover[Yi8*Ymax+Ys32] = 1
878 //
879 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
880 // if that's what an instruction can handle.
881 //
882 // In parallel with the scan through the ytable for the appropriate line, there
883 // is a z pointer that starts out pointing at the strange magic byte list in
884 // the Optab struct.  With each step past a non-matching ytable line, z
885 // advances by the 4th entry in the line.  When a matching line is found, that
886 // z pointer has the extra data to use in laying down the instruction bytes.
887 // The actual bytes laid down are a function of the 3rd entry in the line (that
888 // is, the Ztype) and the z bytes.
889 //
890 // For example, let's look at AADDL.  The optab line says:
891 //
892 //      {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
893 //
894 // and yaddl says
895 //
896 //      var yaddl = []ytab{
897 //              {Yi8, Ynone, Yml, Zibo_m, 2},
898 //              {Yi32, Ynone, Yax, Zil_, 1},
899 //              {Yi32, Ynone, Yml, Zilo_m, 2},
900 //              {Yrl, Ynone, Yml, Zr_m, 1},
901 //              {Yml, Ynone, Yrl, Zm_r, 1},
902 //      }
903 //
904 // so there are 5 possible types of ADDL instruction that can be laid down, and
905 // possible states used to lay them down (Ztype and z pointer, assuming z
906 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
907 //
908 //      Yi8, Yml -> Zibo_m, z (0x83, 00)
909 //      Yi32, Yax -> Zil_, z+2 (0x05)
910 //      Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
911 //      Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
912 //      Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
913 //
914 // The Pconstant in the optab line controls the prefix bytes to emit.  That's
915 // relatively straightforward as this program goes.
916 //
917 // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
918 // example, is an opcode byte (z[0]) then an asmando (which is some kind of
919 // encoded addressing mode for the Yml arg), and then a single immediate byte.
920 // Zilo_m is the same but a long (32-bit) immediate.
921 var optab =
922 // as, ytab, andproto, opcode
923 [...]Optab{
924         {obj.AXXX, nil, 0, opBytes{}},
925         {AAAA, ynone, P32, opBytes{0x37}},
926         {AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
927         {AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
928         {AAAS, ynone, P32, opBytes{0x3f}},
929         {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
930         {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
931         {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
932         {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
933         {AADCXL, yml_rl, Pq4, opBytes{0xf6}},
934         {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
935         {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
936         {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
937         {AADDPD, yxm, Pq, opBytes{0x58}},
938         {AADDPS, yxm, Pm, opBytes{0x58}},
939         {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
940         {AADDSD, yxm, Pf2, opBytes{0x58}},
941         {AADDSS, yxm, Pf3, opBytes{0x58}},
942         {AADDSUBPD, yxm, Pq, opBytes{0xd0}},
943         {AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
944         {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
945         {AADOXL, yml_rl, Pq5, opBytes{0xf6}},
946         {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
947         {AADJSP, nil, 0, opBytes{}},
948         {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
949         {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
950         {AANDNPD, yxm, Pq, opBytes{0x55}},
951         {AANDNPS, yxm, Pm, opBytes{0x55}},
952         {AANDPD, yxm, Pq, opBytes{0x54}},
953         {AANDPS, yxm, Pm, opBytes{0x54}},
954         {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
955         {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
956         {AARPL, yrl_ml, P32, opBytes{0x63}},
957         {ABOUNDL, yrl_m, P32, opBytes{0x62}},
958         {ABOUNDW, yrl_m, Pe, opBytes{0x62}},
959         {ABSFL, yml_rl, Pm, opBytes{0xbc}},
960         {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
961         {ABSFW, yml_rl, Pq, opBytes{0xbc}},
962         {ABSRL, yml_rl, Pm, opBytes{0xbd}},
963         {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
964         {ABSRW, yml_rl, Pq, opBytes{0xbd}},
965         {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
966         {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
967         {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
968         {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
969         {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
970         {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
971         {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
972         {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
973         {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
974         {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
975         {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
976         {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
977         {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
978         {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
979         {ABYTE, ybyte, Px, opBytes{1}},
980         {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
981         {ACBW, ynone, Pe, opBytes{0x98}},
982         {ACDQ, ynone, Px, opBytes{0x99}},
983         {ACDQE, ynone, Pw, opBytes{0x98}},
984         {ACLAC, ynone, Pm, opBytes{01, 0xca}},
985         {ACLC, ynone, Px, opBytes{0xf8}},
986         {ACLD, ynone, Px, opBytes{0xfc}},
987         {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
988         {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
989         {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
990         {ACLI, ynone, Px, opBytes{0xfa}},
991         {ACLTS, ynone, Pm, opBytes{0x06}},
992         {ACLWB, yclflush, Pq, opBytes{0xae, 06}},
993         {ACMC, ynone, Px, opBytes{0xf5}},
994         {ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
995         {ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
996         {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
997         {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
998         {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
999         {ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
1000         {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
1001         {ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
1002         {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
1003         {ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
1004         {ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
1005         {ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
1006         {ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
1007         {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
1008         {ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
1009         {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
1010         {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
1011         {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
1012         {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
1013         {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
1014         {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
1015         {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
1016         {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
1017         {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
1018         {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
1019         {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
1020         {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
1021         {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
1022         {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
1023         {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
1024         {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
1025         {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
1026         {ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
1027         {ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
1028         {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
1029         {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
1030         {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
1031         {ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
1032         {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
1033         {ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
1034         {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
1035         {ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
1036         {ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
1037         {ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
1038         {ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
1039         {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
1040         {ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
1041         {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
1042         {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
1043         {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1044         {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
1045         {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
1046         {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1047         {ACMPSB, ynone, Pb, opBytes{0xa6}},
1048         {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
1049         {ACMPSL, ynone, Px, opBytes{0xa7}},
1050         {ACMPSQ, ynone, Pw, opBytes{0xa7}},
1051         {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
1052         {ACMPSW, ynone, Pe, opBytes{0xa7}},
1053         {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1054         {ACOMISD, yxm, Pe, opBytes{0x2f}},
1055         {ACOMISS, yxm, Pm, opBytes{0x2f}},
1056         {ACPUID, ynone, Pm, opBytes{0xa2}},
1057         {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
1058         {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
1059         {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
1060         {ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
1061         {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
1062         {ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
1063         {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
1064         {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
1065         {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
1066         {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
1067         {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
1068         {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
1069         {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
1070         {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
1071         {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
1072         {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
1073         {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
1074         {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
1075         {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
1076         {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
1077         {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
1078         {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
1079         {ACWD, ynone, Pe, opBytes{0x99}},
1080         {ACWDE, ynone, Px, opBytes{0x98}},
1081         {ACQO, ynone, Pw, opBytes{0x99}},
1082         {ADAA, ynone, P32, opBytes{0x27}},
1083         {ADAS, ynone, P32, opBytes{0x2f}},
1084         {ADECB, yscond, Pb, opBytes{0xfe, 01}},
1085         {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
1086         {ADECQ, yincq, Pw, opBytes{0xff, 01}},
1087         {ADECW, yincq, Pe, opBytes{0xff, 01}},
1088         {ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
1089         {ADIVL, ydivl, Px, opBytes{0xf7, 06}},
1090         {ADIVPD, yxm, Pe, opBytes{0x5e}},
1091         {ADIVPS, yxm, Pm, opBytes{0x5e}},
1092         {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
1093         {ADIVSD, yxm, Pf2, opBytes{0x5e}},
1094         {ADIVSS, yxm, Pf3, opBytes{0x5e}},
1095         {ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
1096         {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
1097         {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
1098         {AEMMS, ynone, Pm, opBytes{0x77}},
1099         {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
1100         {AENTER, nil, 0, opBytes{}}, // botch
1101         {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
1102         {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
1103         {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
1104         {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
1105         {AHLT, ynone, Px, opBytes{0xf4}},
1106         {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
1107         {AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
1108         {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
1109         {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
1110         {AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
1111         {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1112         {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1113         {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1114         {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
1115         {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
1116         {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
1117         {AINB, yin, Pb, opBytes{0xe4, 0xec}},
1118         {AINW, yin, Pe, opBytes{0xe5, 0xed}},
1119         {AINL, yin, Px, opBytes{0xe5, 0xed}},
1120         {AINCB, yscond, Pb, opBytes{0xfe, 00}},
1121         {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
1122         {AINCQ, yincq, Pw, opBytes{0xff, 00}},
1123         {AINCW, yincq, Pe, opBytes{0xff, 00}},
1124         {AINSB, ynone, Pb, opBytes{0x6c}},
1125         {AINSL, ynone, Px, opBytes{0x6d}},
1126         {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
1127         {AINSW, ynone, Pe, opBytes{0x6d}},
1128         {AICEBP, ynone, Px, opBytes{0xf1}},
1129         {AINT, yint, Px, opBytes{0xcd}},
1130         {AINTO, ynone, P32, opBytes{0xce}},
1131         {AIRETL, ynone, Px, opBytes{0xcf}},
1132         {AIRETQ, ynone, Pw, opBytes{0xcf}},
1133         {AIRETW, ynone, Pe, opBytes{0xcf}},
1134         {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
1135         {AJCS, yjcond, Px, opBytes{0x72, 0x82}},
1136         {AJCXZL, yloop, Px, opBytes{0xe3}},
1137         {AJCXZW, yloop, Px, opBytes{0xe3}},
1138         {AJCXZQ, yloop, Px, opBytes{0xe3}},
1139         {AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
1140         {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
1141         {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
1142         {AJHI, yjcond, Px, opBytes{0x77, 0x87}},
1143         {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
1144         {AJLS, yjcond, Px, opBytes{0x76, 0x86}},
1145         {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
1146         {AJMI, yjcond, Px, opBytes{0x78, 0x88}},
1147         {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
1148         {AJNE, yjcond, Px, opBytes{0x75, 0x85}},
1149         {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
1150         {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
1151         {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
1152         {AJPL, yjcond, Px, opBytes{0x79, 0x89}},
1153         {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
1154         {AHADDPD, yxm, Pq, opBytes{0x7c}},
1155         {AHADDPS, yxm, Pf2, opBytes{0x7c}},
1156         {AHSUBPD, yxm, Pq, opBytes{0x7d}},
1157         {AHSUBPS, yxm, Pf2, opBytes{0x7d}},
1158         {ALAHF, ynone, Px, opBytes{0x9f}},
1159         {ALARL, yml_rl, Pm, opBytes{0x02}},
1160         {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
1161         {ALARW, yml_rl, Pq, opBytes{0x02}},
1162         {ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
1163         {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
1164         {ALEAL, ym_rl, Px, opBytes{0x8d}},
1165         {ALEAQ, ym_rl, Pw, opBytes{0x8d}},
1166         {ALEAVEL, ynone, P32, opBytes{0xc9}},
1167         {ALEAVEQ, ynone, Py, opBytes{0xc9}},
1168         {ALEAVEW, ynone, Pe, opBytes{0xc9}},
1169         {ALEAW, ym_rl, Pe, opBytes{0x8d}},
1170         {ALOCK, ynone, Px, opBytes{0xf0}},
1171         {ALODSB, ynone, Pb, opBytes{0xac}},
1172         {ALODSL, ynone, Px, opBytes{0xad}},
1173         {ALODSQ, ynone, Pw, opBytes{0xad}},
1174         {ALODSW, ynone, Pe, opBytes{0xad}},
1175         {ALONG, ybyte, Px, opBytes{4}},
1176         {ALOOP, yloop, Px, opBytes{0xe2}},
1177         {ALOOPEQ, yloop, Px, opBytes{0xe1}},
1178         {ALOOPNE, yloop, Px, opBytes{0xe0}},
1179         {ALTR, ydivl, Pm, opBytes{0x00, 03}},
1180         {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
1181         {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
1182         {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
1183         {ALSLL, yml_rl, Pm, opBytes{0x03}},
1184         {ALSLW, yml_rl, Pq, opBytes{0x03}},
1185         {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
1186         {AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
1187         {AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
1188         {AMAXPD, yxm, Pe, opBytes{0x5f}},
1189         {AMAXPS, yxm, Pm, opBytes{0x5f}},
1190         {AMAXSD, yxm, Pf2, opBytes{0x5f}},
1191         {AMAXSS, yxm, Pf3, opBytes{0x5f}},
1192         {AMINPD, yxm, Pe, opBytes{0x5d}},
1193         {AMINPS, yxm, Pm, opBytes{0x5d}},
1194         {AMINSD, yxm, Pf2, opBytes{0x5d}},
1195         {AMINSS, yxm, Pf3, opBytes{0x5d}},
1196         {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
1197         {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
1198         {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
1199         {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
1200         {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
1201         {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
1202         {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
1203         {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
1204         {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
1205         {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
1206         {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
1207         {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
1208         {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
1209         {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
1210         {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
1211         {AMOVHLPS, yxr, Pm, opBytes{0x12}},
1212         {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
1213         {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
1214         {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1215         {AMOVLHPS, yxr, Pm, opBytes{0x16}},
1216         {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
1217         {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
1218         {AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
1219         {AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
1220         {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
1221         {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
1222         {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
1223         {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
1224         {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
1225         {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
1226         {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
1227         {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1228         {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
1229         {AMOVSB, ynone, Pb, opBytes{0xa4}},
1230         {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
1231         {AMOVSL, ynone, Px, opBytes{0xa5}},
1232         {AMOVSQ, ynone, Pw, opBytes{0xa5}},
1233         {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
1234         {AMOVSW, ynone, Pe, opBytes{0xa5}},
1235         {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
1236         {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
1237         {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
1238         {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
1239         {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
1240         {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
1241         {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
1242         {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
1243         {AMULB, ydivb, Pb, opBytes{0xf6, 04}},
1244         {AMULL, ydivl, Px, opBytes{0xf7, 04}},
1245         {AMULPD, yxm, Pe, opBytes{0x59}},
1246         {AMULPS, yxm, Ym, opBytes{0x59}},
1247         {AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
1248         {AMULSD, yxm, Pf2, opBytes{0x59}},
1249         {AMULSS, yxm, Pf3, opBytes{0x59}},
1250         {AMULW, ydivl, Pe, opBytes{0xf7, 04}},
1251         {ANEGB, yscond, Pb, opBytes{0xf6, 03}},
1252         {ANEGL, yscond, Px, opBytes{0xf7, 03}},
1253         {ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
1254         {ANEGW, yscond, Pe, opBytes{0xf7, 03}},
1255         {obj.ANOP, ynop, Px, opBytes{0, 0}},
1256         {ANOTB, yscond, Pb, opBytes{0xf6, 02}},
1257         {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
1258         {ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
1259         {ANOTW, yscond, Pe, opBytes{0xf7, 02}},
1260         {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
1261         {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1262         {AORPD, yxm, Pq, opBytes{0x56}},
1263         {AORPS, yxm, Pm, opBytes{0x56}},
1264         {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1265         {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1266         {AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
1267         {AOUTL, yin, Px, opBytes{0xe7, 0xef}},
1268         {AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
1269         {AOUTSB, ynone, Pb, opBytes{0x6e}},
1270         {AOUTSL, ynone, Px, opBytes{0x6f}},
1271         {AOUTSW, ynone, Pe, opBytes{0x6f}},
1272         {APABSB, yxm_q4, Pq4, opBytes{0x1c}},
1273         {APABSD, yxm_q4, Pq4, opBytes{0x1e}},
1274         {APABSW, yxm_q4, Pq4, opBytes{0x1d}},
1275         {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
1276         {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
1277         {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
1278         {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
1279         {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
1280         {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
1281         {APADDQ, yxm, Pe, opBytes{0xd4}},
1282         {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
1283         {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
1284         {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
1285         {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
1286         {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
1287         {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
1288         {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
1289         {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
1290         {APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
1291         {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
1292         {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
1293         {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
1294         {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
1295         {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
1296         {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
1297         {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
1298         {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
1299         {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
1300         {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
1301         {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
1302         {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
1303         {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
1304         {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
1305         {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
1306         {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
1307         {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
1308         {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
1309         {APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
1310         {APHADDW, yxm_q4, Pq4, opBytes{0x01}},
1311         {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
1312         {APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
1313         {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
1314         {APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
1315         {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
1316         {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
1317         {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
1318         {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
1319         {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
1320         {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
1321         {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
1322         {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
1323         {APMAXSW, yxm, Pe, opBytes{0xee}},
1324         {APMAXUB, yxm, Pe, opBytes{0xde}},
1325         {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
1326         {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
1327         {APMINSB, yxm_q4, Pq4, opBytes{0x38}},
1328         {APMINSD, yxm_q4, Pq4, opBytes{0x39}},
1329         {APMINSW, yxm, Pe, opBytes{0xea}},
1330         {APMINUB, yxm, Pe, opBytes{0xda}},
1331         {APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
1332         {APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
1333         {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
1334         {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
1335         {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
1336         {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
1337         {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
1338         {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
1339         {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
1340         {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
1341         {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
1342         {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
1343         {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
1344         {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
1345         {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
1346         {APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
1347         {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
1348         {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
1349         {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
1350         {APMULLD, yxm_q4, Pq4, opBytes{0x40}},
1351         {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
1352         {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
1353         {APOPAL, ynone, P32, opBytes{0x61}},
1354         {APOPAW, ynone, Pe, opBytes{0x61}},
1355         {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
1356         {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
1357         {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
1358         {APOPFL, ynone, P32, opBytes{0x9d}},
1359         {APOPFQ, ynone, Py, opBytes{0x9d}},
1360         {APOPFW, ynone, Pe, opBytes{0x9d}},
1361         {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
1362         {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
1363         {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
1364         {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
1365         {APSADBW, yxm, Pq, opBytes{0xf6}},
1366         {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
1367         {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
1368         {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
1369         {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
1370         {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
1371         {APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
1372         {APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
1373         {APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
1374         {APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
1375         {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
1376         {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
1377         {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
1378         {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
1379         {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
1380         {APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
1381         {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
1382         {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
1383         {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
1384         {APSUBB, yxm, Pe, opBytes{0xf8}},
1385         {APSUBL, yxm, Pe, opBytes{0xfa}},
1386         {APSUBQ, yxm, Pe, opBytes{0xfb}},
1387         {APSUBSB, yxm, Pe, opBytes{0xe8}},
1388         {APSUBSW, yxm, Pe, opBytes{0xe9}},
1389         {APSUBUSB, yxm, Pe, opBytes{0xd8}},
1390         {APSUBUSW, yxm, Pe, opBytes{0xd9}},
1391         {APSUBW, yxm, Pe, opBytes{0xf9}},
1392         {APTEST, yxm_q4, Pq4, opBytes{0x17}},
1393         {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
1394         {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
1395         {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
1396         {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
1397         {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
1398         {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
1399         {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
1400         {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
1401         {APUSHAL, ynone, P32, opBytes{0x60}},
1402         {APUSHAW, ynone, Pe, opBytes{0x60}},
1403         {APUSHFL, ynone, P32, opBytes{0x9c}},
1404         {APUSHFQ, ynone, Py, opBytes{0x9c}},
1405         {APUSHFW, ynone, Pe, opBytes{0x9c}},
1406         {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1407         {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1408         {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1409         {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
1410         {AQUAD, ybyte, Px, opBytes{8}},
1411         {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
1412         {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1413         {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1414         {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1415         {ARCPPS, yxm, Pm, opBytes{0x53}},
1416         {ARCPSS, yxm, Pf3, opBytes{0x53}},
1417         {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
1418         {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1419         {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1420         {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1421         {AREP, ynone, Px, opBytes{0xf3}},
1422         {AREPN, ynone, Px, opBytes{0xf2}},
1423         {obj.ARET, ynone, Px, opBytes{0xc3}},
1424         {ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
1425         {ARETFL, yret, Px, opBytes{0xcb, 0xca}},
1426         {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
1427         {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
1428         {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1429         {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1430         {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1431         {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
1432         {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1433         {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1434         {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1435         {ARSQRTPS, yxm, Pm, opBytes{0x52}},
1436         {ARSQRTSS, yxm, Pf3, opBytes{0x52}},
1437         {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
1438         {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1439         {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1440         {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1441         {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1442         {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
1443         {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1444         {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1445         {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1446         {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
1447         {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1448         {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1449         {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1450         {ASCASB, ynone, Pb, opBytes{0xae}},
1451         {ASCASL, ynone, Px, opBytes{0xaf}},
1452         {ASCASQ, ynone, Pw, opBytes{0xaf}},
1453         {ASCASW, ynone, Pe, opBytes{0xaf}},
1454         {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
1455         {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
1456         {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
1457         {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
1458         {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
1459         {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
1460         {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
1461         {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
1462         {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
1463         {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
1464         {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
1465         {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
1466         {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
1467         {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
1468         {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
1469         {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
1470         {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1471         {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1472         {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1473         {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1474         {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
1475         {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1476         {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1477         {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1478         {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
1479         {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
1480         {ASQRTPD, yxm, Pe, opBytes{0x51}},
1481         {ASQRTPS, yxm, Pm, opBytes{0x51}},
1482         {ASQRTSD, yxm, Pf2, opBytes{0x51}},
1483         {ASQRTSS, yxm, Pf3, opBytes{0x51}},
1484         {ASTC, ynone, Px, opBytes{0xf9}},
1485         {ASTD, ynone, Px, opBytes{0xfd}},
1486         {ASTI, ynone, Px, opBytes{0xfb}},
1487         {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
1488         {ASTOSB, ynone, Pb, opBytes{0xaa}},
1489         {ASTOSL, ynone, Px, opBytes{0xab}},
1490         {ASTOSQ, ynone, Pw, opBytes{0xab}},
1491         {ASTOSW, ynone, Pe, opBytes{0xab}},
1492         {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
1493         {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1494         {ASUBPD, yxm, Pe, opBytes{0x5c}},
1495         {ASUBPS, yxm, Pm, opBytes{0x5c}},
1496         {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1497         {ASUBSD, yxm, Pf2, opBytes{0x5c}},
1498         {ASUBSS, yxm, Pf3, opBytes{0x5c}},
1499         {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1500         {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
1501         {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
1502         {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
1503         {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1504         {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1505         {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1506         {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
1507         {obj.ATEXT, ytext, Px, opBytes{}},
1508         {AUCOMISD, yxm, Pe, opBytes{0x2e}},
1509         {AUCOMISS, yxm, Pm, opBytes{0x2e}},
1510         {AUNPCKHPD, yxm, Pe, opBytes{0x15}},
1511         {AUNPCKHPS, yxm, Pm, opBytes{0x15}},
1512         {AUNPCKLPD, yxm, Pe, opBytes{0x14}},
1513         {AUNPCKLPS, yxm, Pm, opBytes{0x14}},
1514         {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
1515         {AVERR, ydivl, Pm, opBytes{0x00, 04}},
1516         {AVERW, ydivl, Pm, opBytes{0x00, 05}},
1517         {AWAIT, ynone, Px, opBytes{0x9b}},
1518         {AWORD, ybyte, Px, opBytes{2}},
1519         {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
1520         {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
1521         {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
1522         {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
1523         {AXLAT, ynone, Px, opBytes{0xd7}},
1524         {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
1525         {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1526         {AXORPD, yxm, Pe, opBytes{0x57}},
1527         {AXORPS, yxm, Pm, opBytes{0x57}},
1528         {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1529         {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1530         {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
1531         {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
1532         {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
1533         {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
1534         {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
1535         {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
1536         {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
1537         {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
1538         {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
1539         {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
1540         {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
1541         {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
1542         {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
1543         {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
1544         {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
1545         {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
1546         {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
1547         {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
1548         {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
1549         {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
1550         {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
1551         {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
1552         {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
1553         {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
1554         {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
1555         {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
1556         {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
1557         {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
1558         {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
1559         {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
1560         {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
1561         {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
1562         {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
1563         {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
1564         {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
1565         {AFCOML, yfmvx, Px, opBytes{0xda, 02}},
1566         {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
1567         {AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
1568         {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
1569         {AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
1570         {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
1571         {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
1572         {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
1573         {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
1574         {AFADDDP, ycompp, Px, opBytes{0xde, 00}},
1575         {AFADDW, yfmvx, Px, opBytes{0xde, 00}},
1576         {AFADDL, yfmvx, Px, opBytes{0xda, 00}},
1577         {AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
1578         {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
1579         {AFMULDP, ycompp, Px, opBytes{0xde, 01}},
1580         {AFMULW, yfmvx, Px, opBytes{0xde, 01}},
1581         {AFMULL, yfmvx, Px, opBytes{0xda, 01}},
1582         {AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
1583         {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
1584         {AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
1585         {AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
1586         {AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
1587         {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
1588         {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
1589         {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
1590         {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
1591         {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
1592         {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
1593         {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
1594         {AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
1595         {AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
1596         {AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
1597         {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
1598         {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
1599         {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
1600         {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
1601         {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
1602         {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
1603         {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
1604         {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
1605         {AFFREE, nil, 0, opBytes{}},
1606         {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
1607         {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
1608         {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
1609         {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
1610         {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
1611         {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
1612         {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
1613         {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
1614         {AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
1615         {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
1616         {AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
1617         {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
1618         {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
1619         {AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
1620         {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
1621         {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
1622         {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
1623         {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
1624         {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
1625         {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
1626         {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
1627         {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
1628         {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
1629         {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
1630         {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
1631         {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
1632         {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
1633         {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
1634         {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
1635         {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
1636         {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
1637         {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
1638         {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
1639         {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
1640         {AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
1641         {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
1642         {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
1643         {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
1644         {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
1645         {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
1646         {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
1647         {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
1648         {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
1649         {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
1650         {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
1651         {AINVD, ynone, Pm, opBytes{0x08}},
1652         {AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
1653         {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
1654         {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
1655         {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
1656         {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
1657         {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
1658         {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
1659         {ARDMSR, ynone, Pm, opBytes{0x32}},
1660         {ARDPMC, ynone, Pm, opBytes{0x33}},
1661         {ARDTSC, ynone, Pm, opBytes{0x31}},
1662         {ARSM, ynone, Pm, opBytes{0xaa}},
1663         {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
1664         {ASYSRET, ynone, Pm, opBytes{0x07}},
1665         {AWBINVD, ynone, Pm, opBytes{0x09}},
1666         {AWRMSR, ynone, Pm, opBytes{0x30}},
1667         {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
1668         {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
1669         {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
1670         {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
1671         {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
1672         {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
1673         {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1674         {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1675         {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1676         {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
1677         {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
1678         {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
1679         {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
1680         {AMOVQL, yrl_ml, Px, opBytes{0x89}},
1681         {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
1682         {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
1683         {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
1684         {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
1685         {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
1686         {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
1687         {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
1688         {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
1689         {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
1690         {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
1691         {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
1692         {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
1693         {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
1694         {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
1695         {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
1696         {AMOVDDUP, yxm, Pf2, opBytes{0x12}},
1697         {AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
1698         {AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
1699         {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
1700         {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
1701         {AUD1, ynone, Pm, opBytes{0xb9, 0}},
1702         {AUD2, ynone, Pm, opBytes{0x0b, 0}},
1703         {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
1704         {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
1705         {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
1706         {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
1707         {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
1708         {ALMSW, ydivl, Pm, opBytes{0x01, 06}},
1709         {ALLDT, ydivl, Pm, opBytes{0x00, 02}},
1710         {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
1711         {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
1712         {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
1713         {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
1714         {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
1715         {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
1716         {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
1717         {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
1718         {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
1719         {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
1720         {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
1721         {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
1722         {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
1723         {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
1724         {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
1725         {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
1726         {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
1727         {ASGDT, yclflush, Pm, opBytes{0x01, 00}},
1728         {ASIDT, yclflush, Pm, opBytes{0x01, 01}},
1729         {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
1730         {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
1731         {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
1732         {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
1733         {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
1734         {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
1735         {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
1736         {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
1737         {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
1738         {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
1739         {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1740         {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1741         {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
1742         {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
1743         {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
1744         {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
1745         {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
1746         {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
1747         {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
1748         {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
1749         {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
1750         {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
1751         {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
1752         {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
1753         {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
1754         {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
1755         {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
1756         {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
1757         {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
1758         {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
1759         {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
1760         {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
1761         {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
1762         {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
1763         {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
1764         {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
1765         {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
1766         {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
1767         {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
1768         {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
1769         {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
1770         {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
1771         {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
1772         {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
1773         {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
1774         {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
1775         {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
1776         {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
1777         {ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
1778
1779         {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
1780         {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
1781         {AXACQUIRE, ynone, Px, opBytes{0xf2}},
1782         {AXRELEASE, ynone, Px, opBytes{0xf3}},
1783         {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
1784         {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
1785         {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
1786         {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
1787         {AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
1788         {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
1789         {obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
1790         {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
1791         {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
1792
1793         {obj.AEND, nil, 0, opBytes{}},
1794         {0, nil, 0, opBytes{}},
1795 }
1796
1797 var opindex [(ALAST + 1) & obj.AMask]*Optab
1798
1799 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
1800 // This happens on systems like Solaris that call .so functions instead of system calls.
1801 // It does not seem to be necessary for any other systems. This is probably working
1802 // around a Solaris-specific bug that should be fixed differently, but we don't know
1803 // what that bug is. And this does fix it.
1804 func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
1805         if ctxt.Headtype == objabi.Hsolaris {
1806                 // All the Solaris dynamic imports from libc.so begin with "libc_".
1807                 return strings.HasPrefix(s.Name, "libc_")
1808         }
1809         return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
1810 }
1811
1812 // single-instruction no-ops of various lengths.
1813 // constructed by hand and disassembled with gdb to verify.
1814 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
1815 var nop = [][16]uint8{
1816         {0x90},
1817         {0x66, 0x90},
1818         {0x0F, 0x1F, 0x00},
1819         {0x0F, 0x1F, 0x40, 0x00},
1820         {0x0F, 0x1F, 0x44, 0x00, 0x00},
1821         {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
1822         {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
1823         {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1824         {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1825 }
1826
1827 // Native Client rejects the repeated 0x66 prefix.
1828 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1829 func fillnop(p []byte, n int) {
1830         var m int
1831
1832         for n > 0 {
1833                 m = n
1834                 if m > len(nop) {
1835                         m = len(nop)
1836                 }
1837                 copy(p[:m], nop[m-1][:m])
1838                 p = p[m:]
1839                 n -= m
1840         }
1841 }
1842
1843 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
1844         s.Grow(int64(c) + int64(pad))
1845         fillnop(s.P[c:], int(pad))
1846         return c + pad
1847 }
1848
1849 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
1850         if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
1851                 return l
1852         }
1853         return q
1854 }
1855
1856 // isJump returns whether p is a jump instruction.
1857 // It is used to ensure that no standalone or macro-fused jump will straddle
1858 // or end on a 32 byte boundary by inserting NOPs before the jumps.
1859 func isJump(p *obj.Prog) bool {
1860         return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
1861                 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
1862 }
1863
1864 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
1865 // jump. Otherwise, nil is returned.
1866 func lookForJCC(p *obj.Prog) *obj.Prog {
1867         // Skip any PCDATA, FUNCDATA or NOP instructions
1868         var q *obj.Prog
1869         for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
1870         }
1871
1872         if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
1873                 return nil
1874         }
1875
1876         switch q.As {
1877         case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
1878                 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
1879         default:
1880                 return nil
1881         }
1882
1883         return q
1884 }
1885
1886 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
1887 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
1888 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
1889 func fusedJump(p *obj.Prog) (bool, uint8) {
1890         var fusedSize uint8
1891
1892         // The first instruction in a macro fused pair may be preceded by the LOCK prefix,
1893         // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
1894         // need to be careful to insert any padding before the locks rather than directly after them.
1895
1896         if p.As == AXRELEASE || p.As == AXACQUIRE {
1897                 fusedSize += p.Isize
1898                 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1899                 }
1900                 if p == nil {
1901                         return false, 0
1902                 }
1903         }
1904         if p.As == ALOCK {
1905                 fusedSize += p.Isize
1906                 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1907                 }
1908                 if p == nil {
1909                         return false, 0
1910                 }
1911         }
1912         cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
1913
1914         cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
1915                 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
1916
1917         testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
1918                 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
1919
1920         incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
1921                 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
1922
1923         if !cmpAddSub && !testAnd && !incDec {
1924                 return false, 0
1925         }
1926
1927         if !incDec {
1928                 var argOne obj.AddrType
1929                 var argTwo obj.AddrType
1930                 if cmp {
1931                         argOne = p.From.Type
1932                         argTwo = p.To.Type
1933                 } else {
1934                         argOne = p.To.Type
1935                         argTwo = p.From.Type
1936                 }
1937                 if argOne == obj.TYPE_REG {
1938                         if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
1939                                 return false, 0
1940                         }
1941                 } else if argOne == obj.TYPE_MEM {
1942                         if argTwo != obj.TYPE_REG {
1943                                 return false, 0
1944                         }
1945                 } else {
1946                         return false, 0
1947                 }
1948         }
1949
1950         fusedSize += p.Isize
1951         jmp := lookForJCC(p)
1952         if jmp == nil {
1953                 return false, 0
1954         }
1955
1956         fusedSize += jmp.Isize
1957
1958         if testAnd {
1959                 return true, fusedSize
1960         }
1961
1962         if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
1963                 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
1964                 return false, 0
1965         }
1966
1967         if cmpAddSub {
1968                 return true, fusedSize
1969         }
1970
1971         if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
1972                 return false, 0
1973         }
1974
1975         return true, fusedSize
1976 }
1977
1978 type padJumpsCtx int32
1979
1980 func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
1981         // Disable jump padding on 32 bit builds by setting
1982         // padJumps to 0.
1983         if ctxt.Arch.Family == sys.I386 {
1984                 return padJumpsCtx(0)
1985         }
1986
1987         // Disable jump padding for hand written assembly code.
1988         if ctxt.IsAsm {
1989                 return padJumpsCtx(0)
1990         }
1991
1992         return padJumpsCtx(32)
1993 }
1994
1995 // padJump detects whether the instruction being assembled is a standalone or a macro-fused
1996 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
1997 // not cross or end on a 32 byte boundary.
1998 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
1999         if pjc == 0 {
2000                 return c
2001         }
2002
2003         var toPad int32
2004         fj, fjSize := fusedJump(p)
2005         mask := int32(pjc - 1)
2006         if fj {
2007                 if (c&mask)+int32(fjSize) >= int32(pjc) {
2008                         toPad = int32(pjc) - (c & mask)
2009                 }
2010         } else if isJump(p) {
2011                 if (c&mask)+int32(p.Isize) >= int32(pjc) {
2012                         toPad = int32(pjc) - (c & mask)
2013                 }
2014         }
2015         if toPad <= 0 {
2016                 return c
2017         }
2018
2019         return noppad(ctxt, s, c, toPad)
2020 }
2021
2022 // reAssemble is called if an instruction's size changes during assembly. If
2023 // it does and the instruction is a standalone or a macro-fused jump we need to
2024 // reassemble.
2025 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
2026         if pjc == 0 {
2027                 return false
2028         }
2029
2030         fj, _ := fusedJump(p)
2031         return fj || isJump(p)
2032 }
2033
2034 type nopPad struct {
2035         p *obj.Prog // Instruction before the pad
2036         n int32     // Size of the pad
2037 }
2038
2039 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
2040         if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
2041                 ctxt.Diag("-spectre=ret not supported on 386")
2042                 ctxt.Retpoline = false // don't keep printing
2043         }
2044
2045         pjc := makePjcCtx(ctxt)
2046
2047         if s.P != nil {
2048                 return
2049         }
2050
2051         if ycover[0] == 0 {
2052                 ctxt.Diag("x86 tables not initialized, call x86.instinit first")
2053         }
2054
2055         for p := s.Func().Text; p != nil; p = p.Link {
2056                 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
2057                         p.To.SetTarget(p)
2058                 }
2059                 if p.As == AADJSP {
2060                         p.To.Type = obj.TYPE_REG
2061                         p.To.Reg = REG_SP
2062                         // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
2063                         // One exception: It is smaller to encode $-0x80 than $0x80.
2064                         // For that case, flip the sign and the op:
2065                         // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
2066                         switch v := p.From.Offset; {
2067                         case v == 0:
2068                                 p.As = obj.ANOP
2069                         case v == 0x80 || (v < 0 && v != -0x80):
2070                                 p.As = spadjop(ctxt, AADDL, AADDQ)
2071                                 p.From.Offset *= -1
2072                         default:
2073                                 p.As = spadjop(ctxt, ASUBL, ASUBQ)
2074                         }
2075                 }
2076                 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
2077                         if p.To.Type != obj.TYPE_REG {
2078                                 ctxt.Diag("non-retpoline-compatible: %v", p)
2079                                 continue
2080                         }
2081                         p.To.Type = obj.TYPE_BRANCH
2082                         p.To.Name = obj.NAME_EXTERN
2083                         p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
2084                         p.To.Reg = 0
2085                         p.To.Offset = 0
2086                 }
2087         }
2088
2089         var count int64 // rough count of number of instructions
2090         for p := s.Func().Text; p != nil; p = p.Link {
2091                 count++
2092                 p.Back = branchShort // use short branches first time through
2093                 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
2094                         p.Back |= branchBackwards
2095                         q.Back |= branchLoopHead
2096                 }
2097         }
2098         s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
2099
2100         var ab AsmBuf
2101         var n int
2102         var c int32
2103         errors := ctxt.Errors
2104         var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
2105         nrelocs0 := len(s.R)
2106         for {
2107                 // This loop continues while there are reasons to re-assemble
2108                 // whole block, like the presence of long forward jumps.
2109                 reAssemble := false
2110                 for i := range s.R[nrelocs0:] {
2111                         s.R[nrelocs0+i] = obj.Reloc{}
2112                 }
2113                 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
2114                 s.P = s.P[:0]
2115                 c = 0
2116                 var pPrev *obj.Prog
2117                 nops = nops[:0]
2118                 for p := s.Func().Text; p != nil; p = p.Link {
2119                         c0 := c
2120                         c = pjc.padJump(ctxt, s, p, c)
2121
2122                         if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
2123                                 // pad with NOPs
2124                                 v := -c & (loopAlign - 1)
2125
2126                                 if v <= maxLoopPad {
2127                                         s.Grow(int64(c) + int64(v))
2128                                         fillnop(s.P[c:], int(v))
2129                                         c += v
2130                                 }
2131                         }
2132
2133                         p.Pc = int64(c)
2134
2135                         // process forward jumps to p
2136                         for q := p.Rel; q != nil; q = q.Forwd {
2137                                 v := int32(p.Pc - (q.Pc + int64(q.Isize)))
2138                                 if q.Back&branchShort != 0 {
2139                                         if v > 127 {
2140                                                 reAssemble = true
2141                                                 q.Back ^= branchShort
2142                                         }
2143
2144                                         if q.As == AJCXZL || q.As == AXBEGIN {
2145                                                 s.P[q.Pc+2] = byte(v)
2146                                         } else {
2147                                                 s.P[q.Pc+1] = byte(v)
2148                                         }
2149                                 } else {
2150                                         binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
2151                                 }
2152                         }
2153
2154                         p.Rel = nil
2155
2156                         p.Pc = int64(c)
2157                         ab.asmins(ctxt, s, p)
2158                         m := ab.Len()
2159                         if int(p.Isize) != m {
2160                                 p.Isize = uint8(m)
2161                                 if pjc.reAssemble(p) {
2162                                         // We need to re-assemble here to check for jumps and fused jumps
2163                                         // that span or end on 32 byte boundaries.
2164                                         reAssemble = true
2165                                 }
2166                         }
2167
2168                         s.Grow(p.Pc + int64(m))
2169                         copy(s.P[p.Pc:], ab.Bytes())
2170                         // If there was padding, remember it.
2171                         if pPrev != nil && !ctxt.IsAsm && c > c0 {
2172                                 nops = append(nops, nopPad{p: pPrev, n: c - c0})
2173                         }
2174                         c += int32(m)
2175                         pPrev = p
2176                 }
2177
2178                 n++
2179                 if n > 1000 {
2180                         ctxt.Diag("span must be looping")
2181                         log.Fatalf("loop")
2182                 }
2183                 if !reAssemble {
2184                         break
2185                 }
2186                 if ctxt.Errors > errors {
2187                         return
2188                 }
2189         }
2190         // splice padding nops into Progs
2191         for _, n := range nops {
2192                 pp := n.p
2193                 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
2194                 pp.Link = np
2195         }
2196
2197         s.Size = int64(c)
2198
2199         if false { /* debug['a'] > 1 */
2200                 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
2201                 var i int
2202                 for i = 0; i < len(s.P); i++ {
2203                         fmt.Printf(" %.2x", s.P[i])
2204                         if i%16 == 15 {
2205                                 fmt.Printf("\n  %.6x", uint(i+1))
2206                         }
2207                 }
2208
2209                 if i%16 != 0 {
2210                         fmt.Printf("\n")
2211                 }
2212
2213                 for i := 0; i < len(s.R); i++ {
2214                         r := &s.R[i]
2215                         fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
2216                 }
2217         }
2218
2219         // Mark nonpreemptible instruction sequences.
2220         // The 2-instruction TLS access sequence
2221         //      MOVQ TLS, BX
2222         //      MOVQ 0(BX)(TLS*1), BX
2223         // is not async preemptible, as if it is preempted and resumed on
2224         // a different thread, the TLS address may become invalid.
2225         if !CanUse1InsnTLS(ctxt) {
2226                 useTLS := func(p *obj.Prog) bool {
2227                         // Only need to mark the second instruction, which has
2228                         // REG_TLS as Index. (It is okay to interrupt and restart
2229                         // the first instruction.)
2230                         return p.From.Index == REG_TLS
2231                 }
2232                 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
2233         }
2234
2235         // Now that we know byte offsets, we can generate jump table entries.
2236         // TODO: could this live in obj instead of obj/$ARCH?
2237         for _, jt := range s.Func().JumpTables {
2238                 for i, p := range jt.Targets {
2239                         // The ith jumptable entry points to the p.Pc'th
2240                         // byte in the function symbol s.
2241                         jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
2242                 }
2243         }
2244 }
2245
2246 func instinit(ctxt *obj.Link) {
2247         if ycover[0] != 0 {
2248                 // Already initialized; stop now.
2249                 // This happens in the cmd/asm tests,
2250                 // each of which re-initializes the arch.
2251                 return
2252         }
2253
2254         switch ctxt.Headtype {
2255         case objabi.Hplan9:
2256                 plan9privates = ctxt.Lookup("_privates")
2257         }
2258
2259         for i := range avxOptab {
2260                 c := avxOptab[i].as
2261                 if opindex[c&obj.AMask] != nil {
2262                         ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
2263                 }
2264                 opindex[c&obj.AMask] = &avxOptab[i]
2265         }
2266         for i := 1; optab[i].as != 0; i++ {
2267                 c := optab[i].as
2268                 if opindex[c&obj.AMask] != nil {
2269                         ctxt.Diag("phase error in optab: %d (%v)", i, c)
2270                 }
2271                 opindex[c&obj.AMask] = &optab[i]
2272         }
2273
2274         for i := 0; i < Ymax; i++ {
2275                 ycover[i*Ymax+i] = 1
2276         }
2277
2278         ycover[Yi0*Ymax+Yu2] = 1
2279         ycover[Yi1*Ymax+Yu2] = 1
2280
2281         ycover[Yi0*Ymax+Yi8] = 1
2282         ycover[Yi1*Ymax+Yi8] = 1
2283         ycover[Yu2*Ymax+Yi8] = 1
2284         ycover[Yu7*Ymax+Yi8] = 1
2285
2286         ycover[Yi0*Ymax+Yu7] = 1
2287         ycover[Yi1*Ymax+Yu7] = 1
2288         ycover[Yu2*Ymax+Yu7] = 1
2289
2290         ycover[Yi0*Ymax+Yu8] = 1
2291         ycover[Yi1*Ymax+Yu8] = 1
2292         ycover[Yu2*Ymax+Yu8] = 1
2293         ycover[Yu7*Ymax+Yu8] = 1
2294
2295         ycover[Yi0*Ymax+Ys32] = 1
2296         ycover[Yi1*Ymax+Ys32] = 1
2297         ycover[Yu2*Ymax+Ys32] = 1
2298         ycover[Yu7*Ymax+Ys32] = 1
2299         ycover[Yu8*Ymax+Ys32] = 1
2300         ycover[Yi8*Ymax+Ys32] = 1
2301
2302         ycover[Yi0*Ymax+Yi32] = 1
2303         ycover[Yi1*Ymax+Yi32] = 1
2304         ycover[Yu2*Ymax+Yi32] = 1
2305         ycover[Yu7*Ymax+Yi32] = 1
2306         ycover[Yu8*Ymax+Yi32] = 1
2307         ycover[Yi8*Ymax+Yi32] = 1
2308         ycover[Ys32*Ymax+Yi32] = 1
2309
2310         ycover[Yi0*Ymax+Yi64] = 1
2311         ycover[Yi1*Ymax+Yi64] = 1
2312         ycover[Yu7*Ymax+Yi64] = 1
2313         ycover[Yu2*Ymax+Yi64] = 1
2314         ycover[Yu8*Ymax+Yi64] = 1
2315         ycover[Yi8*Ymax+Yi64] = 1
2316         ycover[Ys32*Ymax+Yi64] = 1
2317         ycover[Yi32*Ymax+Yi64] = 1
2318
2319         ycover[Yal*Ymax+Yrb] = 1
2320         ycover[Ycl*Ymax+Yrb] = 1
2321         ycover[Yax*Ymax+Yrb] = 1
2322         ycover[Ycx*Ymax+Yrb] = 1
2323         ycover[Yrx*Ymax+Yrb] = 1
2324         ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
2325
2326         ycover[Ycl*Ymax+Ycx] = 1
2327
2328         ycover[Yax*Ymax+Yrx] = 1
2329         ycover[Ycx*Ymax+Yrx] = 1
2330
2331         ycover[Yax*Ymax+Yrl] = 1
2332         ycover[Ycx*Ymax+Yrl] = 1
2333         ycover[Yrx*Ymax+Yrl] = 1
2334         ycover[Yrl32*Ymax+Yrl] = 1
2335
2336         ycover[Yf0*Ymax+Yrf] = 1
2337
2338         ycover[Yal*Ymax+Ymb] = 1
2339         ycover[Ycl*Ymax+Ymb] = 1
2340         ycover[Yax*Ymax+Ymb] = 1
2341         ycover[Ycx*Ymax+Ymb] = 1
2342         ycover[Yrx*Ymax+Ymb] = 1
2343         ycover[Yrb*Ymax+Ymb] = 1
2344         ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
2345         ycover[Ym*Ymax+Ymb] = 1
2346
2347         ycover[Yax*Ymax+Yml] = 1
2348         ycover[Ycx*Ymax+Yml] = 1
2349         ycover[Yrx*Ymax+Yml] = 1
2350         ycover[Yrl*Ymax+Yml] = 1
2351         ycover[Yrl32*Ymax+Yml] = 1
2352         ycover[Ym*Ymax+Yml] = 1
2353
2354         ycover[Yax*Ymax+Ymm] = 1
2355         ycover[Ycx*Ymax+Ymm] = 1
2356         ycover[Yrx*Ymax+Ymm] = 1
2357         ycover[Yrl*Ymax+Ymm] = 1
2358         ycover[Yrl32*Ymax+Ymm] = 1
2359         ycover[Ym*Ymax+Ymm] = 1
2360         ycover[Ymr*Ymax+Ymm] = 1
2361
2362         ycover[Yxr0*Ymax+Yxr] = 1
2363
2364         ycover[Ym*Ymax+Yxm] = 1
2365         ycover[Yxr0*Ymax+Yxm] = 1
2366         ycover[Yxr*Ymax+Yxm] = 1
2367
2368         ycover[Ym*Ymax+Yym] = 1
2369         ycover[Yyr*Ymax+Yym] = 1
2370
2371         ycover[Yxr0*Ymax+YxrEvex] = 1
2372         ycover[Yxr*Ymax+YxrEvex] = 1
2373
2374         ycover[Ym*Ymax+YxmEvex] = 1
2375         ycover[Yxr0*Ymax+YxmEvex] = 1
2376         ycover[Yxr*Ymax+YxmEvex] = 1
2377         ycover[YxrEvex*Ymax+YxmEvex] = 1
2378
2379         ycover[Yyr*Ymax+YyrEvex] = 1
2380
2381         ycover[Ym*Ymax+YymEvex] = 1
2382         ycover[Yyr*Ymax+YymEvex] = 1
2383         ycover[YyrEvex*Ymax+YymEvex] = 1
2384
2385         ycover[Ym*Ymax+Yzm] = 1
2386         ycover[Yzr*Ymax+Yzm] = 1
2387
2388         ycover[Yk0*Ymax+Yk] = 1
2389         ycover[Yknot0*Ymax+Yk] = 1
2390
2391         ycover[Yk0*Ymax+Ykm] = 1
2392         ycover[Yknot0*Ymax+Ykm] = 1
2393         ycover[Yk*Ymax+Ykm] = 1
2394         ycover[Ym*Ymax+Ykm] = 1
2395
2396         ycover[Yxvm*Ymax+YxvmEvex] = 1
2397
2398         ycover[Yyvm*Ymax+YyvmEvex] = 1
2399
2400         for i := 0; i < MAXREG; i++ {
2401                 reg[i] = -1
2402                 if i >= REG_AL && i <= REG_R15B {
2403                         reg[i] = (i - REG_AL) & 7
2404                         if i >= REG_SPB && i <= REG_DIB {
2405                                 regrex[i] = 0x40
2406                         }
2407                         if i >= REG_R8B && i <= REG_R15B {
2408                                 regrex[i] = Rxr | Rxx | Rxb
2409                         }
2410                 }
2411
2412                 if i >= REG_AH && i <= REG_BH {
2413                         reg[i] = 4 + ((i - REG_AH) & 7)
2414                 }
2415                 if i >= REG_AX && i <= REG_R15 {
2416                         reg[i] = (i - REG_AX) & 7
2417                         if i >= REG_R8 {
2418                                 regrex[i] = Rxr | Rxx | Rxb
2419                         }
2420                 }
2421
2422                 if i >= REG_F0 && i <= REG_F0+7 {
2423                         reg[i] = (i - REG_F0) & 7
2424                 }
2425                 if i >= REG_M0 && i <= REG_M0+7 {
2426                         reg[i] = (i - REG_M0) & 7
2427                 }
2428                 if i >= REG_K0 && i <= REG_K0+7 {
2429                         reg[i] = (i - REG_K0) & 7
2430                 }
2431                 if i >= REG_X0 && i <= REG_X0+15 {
2432                         reg[i] = (i - REG_X0) & 7
2433                         if i >= REG_X0+8 {
2434                                 regrex[i] = Rxr | Rxx | Rxb
2435                         }
2436                 }
2437                 if i >= REG_X16 && i <= REG_X16+15 {
2438                         reg[i] = (i - REG_X16) & 7
2439                         if i >= REG_X16+8 {
2440                                 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2441                         } else {
2442                                 regrex[i] = RxrEvex
2443                         }
2444                 }
2445                 if i >= REG_Y0 && i <= REG_Y0+15 {
2446                         reg[i] = (i - REG_Y0) & 7
2447                         if i >= REG_Y0+8 {
2448                                 regrex[i] = Rxr | Rxx | Rxb
2449                         }
2450                 }
2451                 if i >= REG_Y16 && i <= REG_Y16+15 {
2452                         reg[i] = (i - REG_Y16) & 7
2453                         if i >= REG_Y16+8 {
2454                                 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2455                         } else {
2456                                 regrex[i] = RxrEvex
2457                         }
2458                 }
2459                 if i >= REG_Z0 && i <= REG_Z0+15 {
2460                         reg[i] = (i - REG_Z0) & 7
2461                         if i > REG_Z0+7 {
2462                                 regrex[i] = Rxr | Rxx | Rxb
2463                         }
2464                 }
2465                 if i >= REG_Z16 && i <= REG_Z16+15 {
2466                         reg[i] = (i - REG_Z16) & 7
2467                         if i >= REG_Z16+8 {
2468                                 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2469                         } else {
2470                                 regrex[i] = RxrEvex
2471                         }
2472                 }
2473
2474                 if i >= REG_CR+8 && i <= REG_CR+15 {
2475                         regrex[i] = Rxr
2476                 }
2477         }
2478 }
2479
2480 var isAndroid = buildcfg.GOOS == "android"
2481
2482 func prefixof(ctxt *obj.Link, a *obj.Addr) int {
2483         if a.Reg < REG_CS && a.Index < REG_CS { // fast path
2484                 return 0
2485         }
2486         if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
2487                 switch a.Reg {
2488                 case REG_CS:
2489                         return 0x2e
2490
2491                 case REG_DS:
2492                         return 0x3e
2493
2494                 case REG_ES:
2495                         return 0x26
2496
2497                 case REG_FS:
2498                         return 0x64
2499
2500                 case REG_GS:
2501                         return 0x65
2502
2503                 case REG_TLS:
2504                         // NOTE: Systems listed here should be only systems that
2505                         // support direct TLS references like 8(TLS) implemented as
2506                         // direct references from FS or GS. Systems that require
2507                         // the initial-exec model, where you load the TLS base into
2508                         // a register and then index from that register, do not reach
2509                         // this code and should not be listed.
2510                         if ctxt.Arch.Family == sys.I386 {
2511                                 switch ctxt.Headtype {
2512                                 default:
2513                                         if isAndroid {
2514                                                 return 0x65 // GS
2515                                         }
2516                                         log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2517
2518                                 case objabi.Hdarwin,
2519                                         objabi.Hdragonfly,
2520                                         objabi.Hfreebsd,
2521                                         objabi.Hnetbsd,
2522                                         objabi.Hopenbsd:
2523                                         return 0x65 // GS
2524                                 }
2525                         }
2526
2527                         switch ctxt.Headtype {
2528                         default:
2529                                 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2530
2531                         case objabi.Hlinux:
2532                                 if isAndroid {
2533                                         return 0x64 // FS
2534                                 }
2535
2536                                 if ctxt.Flag_shared {
2537                                         log.Fatalf("unknown TLS base register for linux with -shared")
2538                                 } else {
2539                                         return 0x64 // FS
2540                                 }
2541
2542                         case objabi.Hdragonfly,
2543                                 objabi.Hfreebsd,
2544                                 objabi.Hnetbsd,
2545                                 objabi.Hopenbsd,
2546                                 objabi.Hsolaris:
2547                                 return 0x64 // FS
2548
2549                         case objabi.Hdarwin:
2550                                 return 0x65 // GS
2551                         }
2552                 }
2553         }
2554
2555         switch a.Index {
2556         case REG_CS:
2557                 return 0x2e
2558
2559         case REG_DS:
2560                 return 0x3e
2561
2562         case REG_ES:
2563                 return 0x26
2564
2565         case REG_TLS:
2566                 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
2567                         // When building for inclusion into a shared library, an instruction of the form
2568                         //     MOV off(CX)(TLS*1), AX
2569                         // becomes
2570                         //     mov %gs:off(%ecx), %eax // on i386
2571                         //     mov %fs:off(%rcx), %rax // on amd64
2572                         // which assumes that the correct TLS offset has been loaded into CX (today
2573                         // there is only one TLS variable -- g -- so this is OK). When not building for
2574                         // a shared library the instruction it becomes
2575                         //     mov 0x0(%ecx), %eax // on i386
2576                         //     mov 0x0(%rcx), %rax // on amd64
2577                         // and a R_TLS_LE relocation, and so does not require a prefix.
2578                         if ctxt.Arch.Family == sys.I386 {
2579                                 return 0x65 // GS
2580                         }
2581                         return 0x64 // FS
2582                 }
2583
2584         case REG_FS:
2585                 return 0x64
2586
2587         case REG_GS:
2588                 return 0x65
2589         }
2590
2591         return 0
2592 }
2593
2594 // oclassRegList returns multisource operand class for addr.
2595 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
2596         // TODO(quasilyte): when oclass register case is refactored into
2597         // lookup table, use it here to get register kind more easily.
2598         // Helper functions like regIsXmm should go away too (they will become redundant).
2599
2600         regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
2601         regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
2602         regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
2603
2604         reg0, reg1 := decodeRegisterRange(addr.Offset)
2605         low := regIndex(int16(reg0))
2606         high := regIndex(int16(reg1))
2607
2608         if ctxt.Arch.Family == sys.I386 {
2609                 if low >= 8 || high >= 8 {
2610                         return Yxxx
2611                 }
2612         }
2613
2614         switch high - low {
2615         case 3:
2616                 switch {
2617                 case regIsXmm(reg0) && regIsXmm(reg1):
2618                         return YxrEvexMulti4
2619                 case regIsYmm(reg0) && regIsYmm(reg1):
2620                         return YyrEvexMulti4
2621                 case regIsZmm(reg0) && regIsZmm(reg1):
2622                         return YzrMulti4
2623                 default:
2624                         return Yxxx
2625                 }
2626         default:
2627                 return Yxxx
2628         }
2629 }
2630
2631 // oclassVMem returns V-mem (vector memory with VSIB) operand class.
2632 // For addr that is not V-mem returns (Yxxx, false).
2633 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
2634         switch addr.Index {
2635         case REG_X0 + 0,
2636                 REG_X0 + 1,
2637                 REG_X0 + 2,
2638                 REG_X0 + 3,
2639                 REG_X0 + 4,
2640                 REG_X0 + 5,
2641                 REG_X0 + 6,
2642                 REG_X0 + 7:
2643                 return Yxvm, true
2644         case REG_X8 + 0,
2645                 REG_X8 + 1,
2646                 REG_X8 + 2,
2647                 REG_X8 + 3,
2648                 REG_X8 + 4,
2649                 REG_X8 + 5,
2650                 REG_X8 + 6,
2651                 REG_X8 + 7:
2652                 if ctxt.Arch.Family == sys.I386 {
2653                         return Yxxx, true
2654                 }
2655                 return Yxvm, true
2656         case REG_X16 + 0,
2657                 REG_X16 + 1,
2658                 REG_X16 + 2,
2659                 REG_X16 + 3,
2660                 REG_X16 + 4,
2661                 REG_X16 + 5,
2662                 REG_X16 + 6,
2663                 REG_X16 + 7,
2664                 REG_X16 + 8,
2665                 REG_X16 + 9,
2666                 REG_X16 + 10,
2667                 REG_X16 + 11,
2668                 REG_X16 + 12,
2669                 REG_X16 + 13,
2670                 REG_X16 + 14,
2671                 REG_X16 + 15:
2672                 if ctxt.Arch.Family == sys.I386 {
2673                         return Yxxx, true
2674                 }
2675                 return YxvmEvex, true
2676
2677         case REG_Y0 + 0,
2678                 REG_Y0 + 1,
2679                 REG_Y0 + 2,
2680                 REG_Y0 + 3,
2681                 REG_Y0 + 4,
2682                 REG_Y0 + 5,
2683                 REG_Y0 + 6,
2684                 REG_Y0 + 7:
2685                 return Yyvm, true
2686         case REG_Y8 + 0,
2687                 REG_Y8 + 1,
2688                 REG_Y8 + 2,
2689                 REG_Y8 + 3,
2690                 REG_Y8 + 4,
2691                 REG_Y8 + 5,
2692                 REG_Y8 + 6,
2693                 REG_Y8 + 7:
2694                 if ctxt.Arch.Family == sys.I386 {
2695                         return Yxxx, true
2696                 }
2697                 return Yyvm, true
2698         case REG_Y16 + 0,
2699                 REG_Y16 + 1,
2700                 REG_Y16 + 2,
2701                 REG_Y16 + 3,
2702                 REG_Y16 + 4,
2703                 REG_Y16 + 5,
2704                 REG_Y16 + 6,
2705                 REG_Y16 + 7,
2706                 REG_Y16 + 8,
2707                 REG_Y16 + 9,
2708                 REG_Y16 + 10,
2709                 REG_Y16 + 11,
2710                 REG_Y16 + 12,
2711                 REG_Y16 + 13,
2712                 REG_Y16 + 14,
2713                 REG_Y16 + 15:
2714                 if ctxt.Arch.Family == sys.I386 {
2715                         return Yxxx, true
2716                 }
2717                 return YyvmEvex, true
2718
2719         case REG_Z0 + 0,
2720                 REG_Z0 + 1,
2721                 REG_Z0 + 2,
2722                 REG_Z0 + 3,
2723                 REG_Z0 + 4,
2724                 REG_Z0 + 5,
2725                 REG_Z0 + 6,
2726                 REG_Z0 + 7:
2727                 return Yzvm, true
2728         case REG_Z8 + 0,
2729                 REG_Z8 + 1,
2730                 REG_Z8 + 2,
2731                 REG_Z8 + 3,
2732                 REG_Z8 + 4,
2733                 REG_Z8 + 5,
2734                 REG_Z8 + 6,
2735                 REG_Z8 + 7,
2736                 REG_Z8 + 8,
2737                 REG_Z8 + 9,
2738                 REG_Z8 + 10,
2739                 REG_Z8 + 11,
2740                 REG_Z8 + 12,
2741                 REG_Z8 + 13,
2742                 REG_Z8 + 14,
2743                 REG_Z8 + 15,
2744                 REG_Z8 + 16,
2745                 REG_Z8 + 17,
2746                 REG_Z8 + 18,
2747                 REG_Z8 + 19,
2748                 REG_Z8 + 20,
2749                 REG_Z8 + 21,
2750                 REG_Z8 + 22,
2751                 REG_Z8 + 23:
2752                 if ctxt.Arch.Family == sys.I386 {
2753                         return Yxxx, true
2754                 }
2755                 return Yzvm, true
2756         }
2757
2758         return Yxxx, false
2759 }
2760
2761 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
2762         switch a.Type {
2763         case obj.TYPE_REGLIST:
2764                 return oclassRegList(ctxt, a)
2765
2766         case obj.TYPE_NONE:
2767                 return Ynone
2768
2769         case obj.TYPE_BRANCH:
2770                 return Ybr
2771
2772         case obj.TYPE_INDIR:
2773                 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
2774                         return Yindir
2775                 }
2776                 return Yxxx
2777
2778         case obj.TYPE_MEM:
2779                 // Pseudo registers have negative index, but SP is
2780                 // not pseudo on x86, hence REG_SP check is not redundant.
2781                 if a.Index == REG_SP || a.Index < 0 {
2782                         // Can't use FP/SB/PC/SP as the index register.
2783                         return Yxxx
2784                 }
2785
2786                 if vmem, ok := oclassVMem(ctxt, a); ok {
2787                         return vmem
2788                 }
2789
2790                 if ctxt.Arch.Family == sys.AMD64 {
2791                         switch a.Name {
2792                         case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
2793                                 // Global variables can't use index registers and their
2794                                 // base register is %rip (%rip is encoded as REG_NONE).
2795                                 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
2796                                         return Yxxx
2797                                 }
2798                         case obj.NAME_AUTO, obj.NAME_PARAM:
2799                                 // These names must have a base of SP.  The old compiler
2800                                 // uses 0 for the base register. SSA uses REG_SP.
2801                                 if a.Reg != REG_SP && a.Reg != 0 {
2802                                         return Yxxx
2803                                 }
2804                         case obj.NAME_NONE:
2805                                 // everything is ok
2806                         default:
2807                                 // unknown name
2808                                 return Yxxx
2809                         }
2810                 }
2811                 return Ym
2812
2813         case obj.TYPE_ADDR:
2814                 switch a.Name {
2815                 case obj.NAME_GOTREF:
2816                         ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
2817                         return Yxxx
2818
2819                 case obj.NAME_EXTERN,
2820                         obj.NAME_STATIC:
2821                         if a.Sym != nil && useAbs(ctxt, a.Sym) {
2822                                 return Yi32
2823                         }
2824                         return Yiauto // use pc-relative addressing
2825
2826                 case obj.NAME_AUTO,
2827                         obj.NAME_PARAM:
2828                         return Yiauto
2829                 }
2830
2831                 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
2832                 // and got Yi32 in an earlier version of this code.
2833                 // Keep doing that until we fix yduff etc.
2834                 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
2835                         return Yi32
2836                 }
2837
2838                 if a.Sym != nil || a.Name != obj.NAME_NONE {
2839                         ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
2840                 }
2841                 fallthrough
2842
2843         case obj.TYPE_CONST:
2844                 if a.Sym != nil {
2845                         ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
2846                 }
2847
2848                 v := a.Offset
2849                 if ctxt.Arch.Family == sys.I386 {
2850                         v = int64(int32(v))
2851                 }
2852                 switch {
2853                 case v == 0:
2854                         return Yi0
2855                 case v == 1:
2856                         return Yi1
2857                 case v >= 0 && v <= 3:
2858                         return Yu2
2859                 case v >= 0 && v <= 127:
2860                         return Yu7
2861                 case v >= 0 && v <= 255:
2862                         return Yu8
2863                 case v >= -128 && v <= 127:
2864                         return Yi8
2865                 }
2866                 if ctxt.Arch.Family == sys.I386 {
2867                         return Yi32
2868                 }
2869                 l := int32(v)
2870                 if int64(l) == v {
2871                         return Ys32 // can sign extend
2872                 }
2873                 if v>>32 == 0 {
2874                         return Yi32 // unsigned
2875                 }
2876                 return Yi64
2877
2878         case obj.TYPE_TEXTSIZE:
2879                 return Ytextsize
2880         }
2881
2882         if a.Type != obj.TYPE_REG {
2883                 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
2884                 return Yxxx
2885         }
2886
2887         switch a.Reg {
2888         case REG_AL:
2889                 return Yal
2890
2891         case REG_AX:
2892                 return Yax
2893
2894                 /*
2895                         case REG_SPB:
2896                 */
2897         case REG_BPB,
2898                 REG_SIB,
2899                 REG_DIB,
2900                 REG_R8B,
2901                 REG_R9B,
2902                 REG_R10B,
2903                 REG_R11B,
2904                 REG_R12B,
2905                 REG_R13B,
2906                 REG_R14B,
2907                 REG_R15B:
2908                 if ctxt.Arch.Family == sys.I386 {
2909                         return Yxxx
2910                 }
2911                 fallthrough
2912
2913         case REG_DL,
2914                 REG_BL,
2915                 REG_AH,
2916                 REG_CH,
2917                 REG_DH,
2918                 REG_BH:
2919                 return Yrb
2920
2921         case REG_CL:
2922                 return Ycl
2923
2924         case REG_CX:
2925                 return Ycx
2926
2927         case REG_DX, REG_BX:
2928                 return Yrx
2929
2930         case REG_R8, // not really Yrl
2931                 REG_R9,
2932                 REG_R10,
2933                 REG_R11,
2934                 REG_R12,
2935                 REG_R13,
2936                 REG_R14,
2937                 REG_R15:
2938                 if ctxt.Arch.Family == sys.I386 {
2939                         return Yxxx
2940                 }
2941                 fallthrough
2942
2943         case REG_SP, REG_BP, REG_SI, REG_DI:
2944                 if ctxt.Arch.Family == sys.I386 {
2945                         return Yrl32
2946                 }
2947                 return Yrl
2948
2949         case REG_F0 + 0:
2950                 return Yf0
2951
2952         case REG_F0 + 1,
2953                 REG_F0 + 2,
2954                 REG_F0 + 3,
2955                 REG_F0 + 4,
2956                 REG_F0 + 5,
2957                 REG_F0 + 6,
2958                 REG_F0 + 7:
2959                 return Yrf
2960
2961         case REG_M0 + 0,
2962                 REG_M0 + 1,
2963                 REG_M0 + 2,
2964                 REG_M0 + 3,
2965                 REG_M0 + 4,
2966                 REG_M0 + 5,
2967                 REG_M0 + 6,
2968                 REG_M0 + 7:
2969                 return Ymr
2970
2971         case REG_X0:
2972                 return Yxr0
2973
2974         case REG_X0 + 1,
2975                 REG_X0 + 2,
2976                 REG_X0 + 3,
2977                 REG_X0 + 4,
2978                 REG_X0 + 5,
2979                 REG_X0 + 6,
2980                 REG_X0 + 7,
2981                 REG_X0 + 8,
2982                 REG_X0 + 9,
2983                 REG_X0 + 10,
2984                 REG_X0 + 11,
2985                 REG_X0 + 12,
2986                 REG_X0 + 13,
2987                 REG_X0 + 14,
2988                 REG_X0 + 15:
2989                 return Yxr
2990
2991         case REG_X0 + 16,
2992                 REG_X0 + 17,
2993                 REG_X0 + 18,
2994                 REG_X0 + 19,
2995                 REG_X0 + 20,
2996                 REG_X0 + 21,
2997                 REG_X0 + 22,
2998                 REG_X0 + 23,
2999                 REG_X0 + 24,
3000                 REG_X0 + 25,
3001                 REG_X0 + 26,
3002                 REG_X0 + 27,
3003                 REG_X0 + 28,
3004                 REG_X0 + 29,
3005                 REG_X0 + 30,
3006                 REG_X0 + 31:
3007                 return YxrEvex
3008
3009         case REG_Y0 + 0,
3010                 REG_Y0 + 1,
3011                 REG_Y0 + 2,
3012                 REG_Y0 + 3,
3013                 REG_Y0 + 4,
3014                 REG_Y0 + 5,
3015                 REG_Y0 + 6,
3016                 REG_Y0 + 7,
3017                 REG_Y0 + 8,
3018                 REG_Y0 + 9,
3019                 REG_Y0 + 10,
3020                 REG_Y0 + 11,
3021                 REG_Y0 + 12,
3022                 REG_Y0 + 13,
3023                 REG_Y0 + 14,
3024                 REG_Y0 + 15:
3025                 return Yyr
3026
3027         case REG_Y0 + 16,
3028                 REG_Y0 + 17,
3029                 REG_Y0 + 18,
3030                 REG_Y0 + 19,
3031                 REG_Y0 + 20,
3032                 REG_Y0 + 21,
3033                 REG_Y0 + 22,
3034                 REG_Y0 + 23,
3035                 REG_Y0 + 24,
3036                 REG_Y0 + 25,
3037                 REG_Y0 + 26,
3038                 REG_Y0 + 27,
3039                 REG_Y0 + 28,
3040                 REG_Y0 + 29,
3041                 REG_Y0 + 30,
3042                 REG_Y0 + 31:
3043                 return YyrEvex
3044
3045         case REG_Z0 + 0,
3046                 REG_Z0 + 1,
3047                 REG_Z0 + 2,
3048                 REG_Z0 + 3,
3049                 REG_Z0 + 4,
3050                 REG_Z0 + 5,
3051                 REG_Z0 + 6,
3052                 REG_Z0 + 7:
3053                 return Yzr
3054
3055         case REG_Z0 + 8,
3056                 REG_Z0 + 9,
3057                 REG_Z0 + 10,
3058                 REG_Z0 + 11,
3059                 REG_Z0 + 12,
3060                 REG_Z0 + 13,
3061                 REG_Z0 + 14,
3062                 REG_Z0 + 15,
3063                 REG_Z0 + 16,
3064                 REG_Z0 + 17,
3065                 REG_Z0 + 18,
3066                 REG_Z0 + 19,
3067                 REG_Z0 + 20,
3068                 REG_Z0 + 21,
3069                 REG_Z0 + 22,
3070                 REG_Z0 + 23,
3071                 REG_Z0 + 24,
3072                 REG_Z0 + 25,
3073                 REG_Z0 + 26,
3074                 REG_Z0 + 27,
3075                 REG_Z0 + 28,
3076                 REG_Z0 + 29,
3077                 REG_Z0 + 30,
3078                 REG_Z0 + 31:
3079                 if ctxt.Arch.Family == sys.I386 {
3080                         return Yxxx
3081                 }
3082                 return Yzr
3083
3084         case REG_K0:
3085                 return Yk0
3086
3087         case REG_K0 + 1,
3088                 REG_K0 + 2,
3089                 REG_K0 + 3,
3090                 REG_K0 + 4,
3091                 REG_K0 + 5,
3092                 REG_K0 + 6,
3093                 REG_K0 + 7:
3094                 return Yknot0
3095
3096         case REG_CS:
3097                 return Ycs
3098         case REG_SS:
3099                 return Yss
3100         case REG_DS:
3101                 return Yds
3102         case REG_ES:
3103                 return Yes
3104         case REG_FS:
3105                 return Yfs
3106         case REG_GS:
3107                 return Ygs
3108         case REG_TLS:
3109                 return Ytls
3110
3111         case REG_GDTR:
3112                 return Ygdtr
3113         case REG_IDTR:
3114                 return Yidtr
3115         case REG_LDTR:
3116                 return Yldtr
3117         case REG_MSW:
3118                 return Ymsw
3119         case REG_TASK:
3120                 return Ytask
3121
3122         case REG_CR + 0:
3123                 return Ycr0
3124         case REG_CR + 1:
3125                 return Ycr1
3126         case REG_CR + 2:
3127                 return Ycr2
3128         case REG_CR + 3:
3129                 return Ycr3
3130         case REG_CR + 4:
3131                 return Ycr4
3132         case REG_CR + 5:
3133                 return Ycr5
3134         case REG_CR + 6:
3135                 return Ycr6
3136         case REG_CR + 7:
3137                 return Ycr7
3138         case REG_CR + 8:
3139                 return Ycr8
3140
3141         case REG_DR + 0:
3142                 return Ydr0
3143         case REG_DR + 1:
3144                 return Ydr1
3145         case REG_DR + 2:
3146                 return Ydr2
3147         case REG_DR + 3:
3148                 return Ydr3
3149         case REG_DR + 4:
3150                 return Ydr4
3151         case REG_DR + 5:
3152                 return Ydr5
3153         case REG_DR + 6:
3154                 return Ydr6
3155         case REG_DR + 7:
3156                 return Ydr7
3157
3158         case REG_TR + 0:
3159                 return Ytr0
3160         case REG_TR + 1:
3161                 return Ytr1
3162         case REG_TR + 2:
3163                 return Ytr2
3164         case REG_TR + 3:
3165                 return Ytr3
3166         case REG_TR + 4:
3167                 return Ytr4
3168         case REG_TR + 5:
3169                 return Ytr5
3170         case REG_TR + 6:
3171                 return Ytr6
3172         case REG_TR + 7:
3173                 return Ytr7
3174         }
3175
3176         return Yxxx
3177 }
3178
3179 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
3180 // and hold assembly state.
3181 type AsmBuf struct {
3182         buf      [100]byte
3183         off      int
3184         rexflag  int
3185         vexflag  bool // Per inst: true for VEX-encoded
3186         evexflag bool // Per inst: true for EVEX-encoded
3187         rep      bool
3188         repn     bool
3189         lock     bool
3190
3191         evex evexBits // Initialized when evexflag is true
3192 }
3193
3194 // Put1 appends one byte to the end of the buffer.
3195 func (ab *AsmBuf) Put1(x byte) {
3196         ab.buf[ab.off] = x
3197         ab.off++
3198 }
3199
3200 // Put2 appends two bytes to the end of the buffer.
3201 func (ab *AsmBuf) Put2(x, y byte) {
3202         ab.buf[ab.off+0] = x
3203         ab.buf[ab.off+1] = y
3204         ab.off += 2
3205 }
3206
3207 // Put3 appends three bytes to the end of the buffer.
3208 func (ab *AsmBuf) Put3(x, y, z byte) {
3209         ab.buf[ab.off+0] = x
3210         ab.buf[ab.off+1] = y
3211         ab.buf[ab.off+2] = z
3212         ab.off += 3
3213 }
3214
3215 // Put4 appends four bytes to the end of the buffer.
3216 func (ab *AsmBuf) Put4(x, y, z, w byte) {
3217         ab.buf[ab.off+0] = x
3218         ab.buf[ab.off+1] = y
3219         ab.buf[ab.off+2] = z
3220         ab.buf[ab.off+3] = w
3221         ab.off += 4
3222 }
3223
3224 // PutInt16 writes v into the buffer using little-endian encoding.
3225 func (ab *AsmBuf) PutInt16(v int16) {
3226         ab.buf[ab.off+0] = byte(v)
3227         ab.buf[ab.off+1] = byte(v >> 8)
3228         ab.off += 2
3229 }
3230
3231 // PutInt32 writes v into the buffer using little-endian encoding.
3232 func (ab *AsmBuf) PutInt32(v int32) {
3233         ab.buf[ab.off+0] = byte(v)
3234         ab.buf[ab.off+1] = byte(v >> 8)
3235         ab.buf[ab.off+2] = byte(v >> 16)
3236         ab.buf[ab.off+3] = byte(v >> 24)
3237         ab.off += 4
3238 }
3239
3240 // PutInt64 writes v into the buffer using little-endian encoding.
3241 func (ab *AsmBuf) PutInt64(v int64) {
3242         ab.buf[ab.off+0] = byte(v)
3243         ab.buf[ab.off+1] = byte(v >> 8)
3244         ab.buf[ab.off+2] = byte(v >> 16)
3245         ab.buf[ab.off+3] = byte(v >> 24)
3246         ab.buf[ab.off+4] = byte(v >> 32)
3247         ab.buf[ab.off+5] = byte(v >> 40)
3248         ab.buf[ab.off+6] = byte(v >> 48)
3249         ab.buf[ab.off+7] = byte(v >> 56)
3250         ab.off += 8
3251 }
3252
3253 // Put copies b into the buffer.
3254 func (ab *AsmBuf) Put(b []byte) {
3255         copy(ab.buf[ab.off:], b)
3256         ab.off += len(b)
3257 }
3258
3259 // PutOpBytesLit writes zero terminated sequence of bytes from op,
3260 // starting at specified offset (e.g. z counter value).
3261 // Trailing 0 is not written.
3262 //
3263 // Intended to be used for literal Z cases.
3264 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
3265 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
3266         for int(op[offset]) != 0 {
3267                 ab.Put1(byte(op[offset]))
3268                 offset++
3269         }
3270 }
3271
3272 // Insert inserts b at offset i.
3273 func (ab *AsmBuf) Insert(i int, b byte) {
3274         ab.off++
3275         copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
3276         ab.buf[i] = b
3277 }
3278
3279 // Last returns the byte at the end of the buffer.
3280 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
3281
3282 // Len returns the length of the buffer.
3283 func (ab *AsmBuf) Len() int { return ab.off }
3284
3285 // Bytes returns the contents of the buffer.
3286 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
3287
3288 // Reset empties the buffer.
3289 func (ab *AsmBuf) Reset() { ab.off = 0 }
3290
3291 // At returns the byte at offset i.
3292 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
3293
3294 // asmidx emits SIB byte.
3295 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
3296         var i int
3297
3298         // X/Y index register is used in VSIB.
3299         switch index {
3300         default:
3301                 goto bad
3302
3303         case REG_NONE:
3304                 i = 4 << 3
3305                 goto bas
3306
3307         case REG_R8,
3308                 REG_R9,
3309                 REG_R10,
3310                 REG_R11,
3311                 REG_R12,
3312                 REG_R13,
3313                 REG_R14,
3314                 REG_R15,
3315                 REG_X8,
3316                 REG_X9,
3317                 REG_X10,
3318                 REG_X11,
3319                 REG_X12,
3320                 REG_X13,
3321                 REG_X14,
3322                 REG_X15,
3323                 REG_X16,
3324                 REG_X17,
3325                 REG_X18,
3326                 REG_X19,
3327                 REG_X20,
3328                 REG_X21,
3329                 REG_X22,
3330                 REG_X23,
3331                 REG_X24,
3332                 REG_X25,
3333                 REG_X26,
3334                 REG_X27,
3335                 REG_X28,
3336                 REG_X29,
3337                 REG_X30,
3338                 REG_X31,
3339                 REG_Y8,
3340                 REG_Y9,
3341                 REG_Y10,
3342                 REG_Y11,
3343                 REG_Y12,
3344                 REG_Y13,
3345                 REG_Y14,
3346                 REG_Y15,
3347                 REG_Y16,
3348                 REG_Y17,
3349                 REG_Y18,
3350                 REG_Y19,
3351                 REG_Y20,
3352                 REG_Y21,
3353                 REG_Y22,
3354                 REG_Y23,
3355                 REG_Y24,
3356                 REG_Y25,
3357                 REG_Y26,
3358                 REG_Y27,
3359                 REG_Y28,
3360                 REG_Y29,
3361                 REG_Y30,
3362                 REG_Y31,
3363                 REG_Z8,
3364                 REG_Z9,
3365                 REG_Z10,
3366                 REG_Z11,
3367                 REG_Z12,
3368                 REG_Z13,
3369                 REG_Z14,
3370                 REG_Z15,
3371                 REG_Z16,
3372                 REG_Z17,
3373                 REG_Z18,
3374                 REG_Z19,
3375                 REG_Z20,
3376                 REG_Z21,
3377                 REG_Z22,
3378                 REG_Z23,
3379                 REG_Z24,
3380                 REG_Z25,
3381                 REG_Z26,
3382                 REG_Z27,
3383                 REG_Z28,
3384                 REG_Z29,
3385                 REG_Z30,
3386                 REG_Z31:
3387                 if ctxt.Arch.Family == sys.I386 {
3388                         goto bad
3389                 }
3390                 fallthrough
3391
3392         case REG_AX,
3393                 REG_CX,
3394                 REG_DX,
3395                 REG_BX,
3396                 REG_BP,
3397                 REG_SI,
3398                 REG_DI,
3399                 REG_X0,
3400                 REG_X1,
3401                 REG_X2,
3402                 REG_X3,
3403                 REG_X4,
3404                 REG_X5,
3405                 REG_X6,
3406                 REG_X7,
3407                 REG_Y0,
3408                 REG_Y1,
3409                 REG_Y2,
3410                 REG_Y3,
3411                 REG_Y4,
3412                 REG_Y5,
3413                 REG_Y6,
3414                 REG_Y7,
3415                 REG_Z0,
3416                 REG_Z1,
3417                 REG_Z2,
3418                 REG_Z3,
3419                 REG_Z4,
3420                 REG_Z5,
3421                 REG_Z6,
3422                 REG_Z7:
3423                 i = reg[index] << 3
3424         }
3425
3426         switch scale {
3427         default:
3428                 goto bad
3429
3430         case 1:
3431                 break
3432
3433         case 2:
3434                 i |= 1 << 6
3435
3436         case 4:
3437                 i |= 2 << 6
3438
3439         case 8:
3440                 i |= 3 << 6
3441         }
3442
3443 bas:
3444         switch base {
3445         default:
3446                 goto bad
3447
3448         case REG_NONE: // must be mod=00
3449                 i |= 5
3450
3451         case REG_R8,
3452                 REG_R9,
3453                 REG_R10,
3454                 REG_R11,
3455                 REG_R12,
3456                 REG_R13,
3457                 REG_R14,
3458                 REG_R15:
3459                 if ctxt.Arch.Family == sys.I386 {
3460                         goto bad
3461                 }
3462                 fallthrough
3463
3464         case REG_AX,
3465                 REG_CX,
3466                 REG_DX,
3467                 REG_BX,
3468                 REG_SP,
3469                 REG_BP,
3470                 REG_SI,
3471                 REG_DI:
3472                 i |= reg[base]
3473         }
3474
3475         ab.Put1(byte(i))
3476         return
3477
3478 bad:
3479         ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
3480         ab.Put1(0)
3481 }
3482
3483 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
3484         var rel obj.Reloc
3485
3486         v := vaddr(ctxt, p, a, &rel)
3487         if rel.Siz != 0 {
3488                 if rel.Siz != 4 {
3489                         ctxt.Diag("bad reloc")
3490                 }
3491                 r := obj.Addrel(cursym)
3492                 *r = rel
3493                 r.Off = int32(p.Pc + int64(ab.Len()))
3494         }
3495
3496         ab.PutInt32(int32(v))
3497 }
3498
3499 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
3500         if r != nil {
3501                 *r = obj.Reloc{}
3502         }
3503
3504         switch a.Name {
3505         case obj.NAME_STATIC,
3506                 obj.NAME_GOTREF,
3507                 obj.NAME_EXTERN:
3508                 s := a.Sym
3509                 if r == nil {
3510                         ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3511                         log.Fatalf("reloc")
3512                 }
3513
3514                 if a.Name == obj.NAME_GOTREF {
3515                         r.Siz = 4
3516                         r.Type = objabi.R_GOTPCREL
3517                 } else if useAbs(ctxt, s) {
3518                         r.Siz = 4
3519                         r.Type = objabi.R_ADDR
3520                 } else {
3521                         r.Siz = 4
3522                         r.Type = objabi.R_PCREL
3523                 }
3524
3525                 r.Off = -1 // caller must fill in
3526                 r.Sym = s
3527                 r.Add = a.Offset
3528
3529                 return 0
3530         }
3531
3532         if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
3533                 if r == nil {
3534                         ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3535                         log.Fatalf("reloc")
3536                 }
3537
3538                 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
3539                         r.Type = objabi.R_TLS_LE
3540                         r.Siz = 4
3541                         r.Off = -1 // caller must fill in
3542                         r.Add = a.Offset
3543                 }
3544                 return 0
3545         }
3546
3547         return a.Offset
3548 }
3549
3550 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
3551         var base int
3552         var rel obj.Reloc
3553
3554         rex &= 0x40 | Rxr
3555         if a.Offset != int64(int32(a.Offset)) {
3556                 // The rules are slightly different for 386 and AMD64,
3557                 // mostly for historical reasons. We may unify them later,
3558                 // but it must be discussed beforehand.
3559                 //
3560                 // For 64bit mode only LEAL is allowed to overflow.
3561                 // It's how https://golang.org/cl/59630 made it.
3562                 // crypto/sha1/sha1block_amd64.s depends on this feature.
3563                 //
3564                 // For 32bit mode rules are more permissive.
3565                 // If offset fits uint32, it's permitted.
3566                 // This is allowed for assembly that wants to use 32-bit hex
3567                 // constants, e.g. LEAL 0x99999999(AX), AX.
3568                 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
3569                         (ctxt.Arch.Family != sys.AMD64 &&
3570                                 int64(uint32(a.Offset)) == a.Offset &&
3571                                 ab.rexflag&Rxw == 0)
3572                 if !overflowOK {
3573                         ctxt.Diag("offset too large in %s", p)
3574                 }
3575         }
3576         v := int32(a.Offset)
3577         rel.Siz = 0
3578
3579         switch a.Type {
3580         case obj.TYPE_ADDR:
3581                 if a.Name == obj.NAME_NONE {
3582                         ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
3583                 }
3584                 if a.Index == REG_TLS {
3585                         ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
3586                 }
3587                 goto bad
3588
3589         case obj.TYPE_REG:
3590                 const regFirst = REG_AL
3591                 const regLast = REG_Z31
3592                 if a.Reg < regFirst || regLast < a.Reg {
3593                         goto bad
3594                 }
3595                 if v != 0 {
3596                         goto bad
3597                 }
3598                 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
3599                 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
3600                 return
3601         }
3602
3603         if a.Type != obj.TYPE_MEM {
3604                 goto bad
3605         }
3606
3607         if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
3608                 base := int(a.Reg)
3609                 switch a.Name {
3610                 case obj.NAME_EXTERN,
3611                         obj.NAME_GOTREF,
3612                         obj.NAME_STATIC:
3613                         if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
3614                                 goto bad
3615                         }
3616                         if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3617                                 // The base register has already been set. It holds the PC
3618                                 // of this instruction returned by a PC-reading thunk.
3619                                 // See obj6.go:rewriteToPcrel.
3620                         } else {
3621                                 base = REG_NONE
3622                         }
3623                         v = int32(vaddr(ctxt, p, a, &rel))
3624
3625                 case obj.NAME_AUTO,
3626                         obj.NAME_PARAM:
3627                         base = REG_SP
3628                 }
3629
3630                 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
3631                 if base == REG_NONE {
3632                         ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3633                         ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3634                         goto putrelv
3635                 }
3636
3637                 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3638                         ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3639                         ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3640                         return
3641                 }
3642
3643                 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3644                         ab.Put1(byte(1<<6 | 4<<0 | r<<3))
3645                         ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3646                         ab.Put1(disp8)
3647                         return
3648                 }
3649
3650                 ab.Put1(byte(2<<6 | 4<<0 | r<<3))
3651                 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3652                 goto putrelv
3653         }
3654
3655         base = int(a.Reg)
3656         switch a.Name {
3657         case obj.NAME_STATIC,
3658                 obj.NAME_GOTREF,
3659                 obj.NAME_EXTERN:
3660                 if a.Sym == nil {
3661                         ctxt.Diag("bad addr: %v", p)
3662                 }
3663                 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3664                         // The base register has already been set. It holds the PC
3665                         // of this instruction returned by a PC-reading thunk.
3666                         // See obj6.go:rewriteToPcrel.
3667                 } else {
3668                         base = REG_NONE
3669                 }
3670                 v = int32(vaddr(ctxt, p, a, &rel))
3671
3672         case obj.NAME_AUTO,
3673                 obj.NAME_PARAM:
3674                 base = REG_SP
3675         }
3676
3677         if base == REG_TLS {
3678                 v = int32(vaddr(ctxt, p, a, &rel))
3679         }
3680
3681         ab.rexflag |= regrex[base]&Rxb | rex
3682         if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
3683                 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
3684                         if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
3685                                 ctxt.Diag("%v has offset against gotref", p)
3686                         }
3687                         ab.Put1(byte(0<<6 | 5<<0 | r<<3))
3688                         goto putrelv
3689                 }
3690
3691                 // temporary
3692                 ab.Put2(
3693                         byte(0<<6|4<<0|r<<3), // sib present
3694                         0<<6|4<<3|5<<0,       // DS:d32
3695                 )
3696                 goto putrelv
3697         }
3698
3699         if base == REG_SP || base == REG_R12 {
3700                 if v == 0 {
3701                         ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3702                         ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3703                         return
3704                 }
3705
3706                 if disp8, ok := toDisp8(v, p, ab); ok {
3707                         ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
3708                         ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3709                         ab.Put1(disp8)
3710                         return
3711                 }
3712
3713                 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3714                 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3715                 goto putrelv
3716         }
3717
3718         if REG_AX <= base && base <= REG_R15 {
3719                 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
3720                         ctxt.Headtype != objabi.Hwindows {
3721                         rel = obj.Reloc{}
3722                         rel.Type = objabi.R_TLS_LE
3723                         rel.Siz = 4
3724                         rel.Sym = nil
3725                         rel.Add = int64(v)
3726                         v = 0
3727                 }
3728
3729                 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3730                         ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3731                         return
3732                 }
3733
3734                 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3735                         ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
3736                         return
3737                 }
3738
3739                 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3740                 goto putrelv
3741         }
3742
3743         goto bad
3744
3745 putrelv:
3746         if rel.Siz != 0 {
3747                 if rel.Siz != 4 {
3748                         ctxt.Diag("bad rel")
3749                         goto bad
3750                 }
3751
3752                 r := obj.Addrel(cursym)
3753                 *r = rel
3754                 r.Off = int32(p.Pc + int64(ab.Len()))
3755         }
3756
3757         ab.PutInt32(v)
3758         return
3759
3760 bad:
3761         ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
3762 }
3763
3764 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
3765         ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
3766 }
3767
3768 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
3769         ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
3770 }
3771
3772 func bytereg(a *obj.Addr, t *uint8) {
3773         if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
3774                 a.Reg += REG_AL - REG_AX
3775                 *t = 0
3776         }
3777 }
3778
3779 func unbytereg(a *obj.Addr, t *uint8) {
3780         if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
3781                 a.Reg += REG_AX - REG_AL
3782                 *t = 0
3783         }
3784 }
3785
3786 const (
3787         movLit uint8 = iota // Like Zlit
3788         movRegMem
3789         movMemReg
3790         movRegMem2op
3791         movMemReg2op
3792         movFullPtr // Load full pointer, trash heap (unsupported)
3793         movDoubleShift
3794         movTLSReg
3795 )
3796
3797 var ymovtab = []movtab{
3798         // push
3799         {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
3800         {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
3801         {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
3802         {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
3803         {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3804         {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3805         {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3806         {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3807         {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
3808         {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
3809         {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
3810         {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
3811         {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
3812         {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
3813
3814         // pop
3815         {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
3816         {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
3817         {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
3818         {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3819         {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3820         {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3821         {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3822         {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
3823         {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
3824         {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
3825         {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
3826         {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
3827
3828         // mov seg
3829         {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
3830         {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
3831         {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
3832         {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
3833         {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
3834         {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
3835         {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
3836         {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
3837         {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
3838         {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
3839         {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
3840         {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
3841
3842         // mov cr
3843         {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3844         {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3845         {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3846         {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3847         {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3848         {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3849         {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3850         {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3851         {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3852         {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3853         {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3854         {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3855         {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3856         {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3857         {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3858         {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3859         {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3860         {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3861         {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3862         {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3863
3864         // mov dr
3865         {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3866         {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3867         {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3868         {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3869         {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
3870         {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
3871         {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3872         {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3873         {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3874         {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3875         {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3876         {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3877         {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
3878         {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
3879         {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3880         {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3881
3882         // mov tr
3883         {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
3884         {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
3885         {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
3886         {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
3887
3888         // lgdt, sgdt, lidt, sidt
3889         {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3890         {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3891         {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3892         {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3893         {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3894         {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3895         {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3896         {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3897
3898         // lldt, sldt
3899         {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
3900         {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
3901
3902         // lmsw, smsw
3903         {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
3904         {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
3905
3906         // ltr, str
3907         {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
3908         {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
3909
3910         /* load full pointer - unsupported
3911         {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
3912         {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
3913         */
3914
3915         // double shift
3916         {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3917         {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3918         {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3919         {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3920         {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3921         {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3922         {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3923         {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3924         {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3925         {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3926         {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3927         {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3928         {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3929         {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3930         {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3931         {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3932         {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3933         {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3934
3935         // load TLS base
3936         {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3937         {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3938         {0, 0, 0, 0, 0, [4]uint8{}},
3939 }
3940
3941 func isax(a *obj.Addr) bool {
3942         switch a.Reg {
3943         case REG_AX, REG_AL, REG_AH:
3944                 return true
3945         }
3946
3947         return a.Index == REG_AX
3948 }
3949
3950 func subreg(p *obj.Prog, from int, to int) {
3951         if false { /* debug['Q'] */
3952                 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
3953         }
3954
3955         if int(p.From.Reg) == from {
3956                 p.From.Reg = int16(to)
3957                 p.Ft = 0
3958         }
3959
3960         if int(p.To.Reg) == from {
3961                 p.To.Reg = int16(to)
3962                 p.Tt = 0
3963         }
3964
3965         if int(p.From.Index) == from {
3966                 p.From.Index = int16(to)
3967                 p.Ft = 0
3968         }
3969
3970         if int(p.To.Index) == from {
3971                 p.To.Index = int16(to)
3972                 p.Tt = 0
3973         }
3974
3975         if false { /* debug['Q'] */
3976                 fmt.Printf("%v\n", p)
3977         }
3978 }
3979
3980 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
3981         switch op {
3982         case Pm, Pe, Pf2, Pf3:
3983                 if osize != 1 {
3984                         if op != Pm {
3985                                 ab.Put1(byte(op))
3986                         }
3987                         ab.Put1(Pm)
3988                         z++
3989                         op = int(o.op[z])
3990                         break
3991                 }
3992                 fallthrough
3993
3994         default:
3995                 if ab.Len() == 0 || ab.Last() != Pm {
3996                         ab.Put1(Pm)
3997                 }
3998         }
3999
4000         ab.Put1(byte(op))
4001         return z
4002 }
4003
4004 var bpduff1 = []byte{
4005         0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
4006         0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
4007 }
4008
4009 var bpduff2 = []byte{
4010         0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
4011 }
4012
4013 // asmevex emits EVEX pregis and opcode byte.
4014 // In addition to asmvex r/m, vvvv and reg fields also requires optional
4015 // K-masking register.
4016 //
4017 // Expects asmbuf.evex to be properly initialized.
4018 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
4019         ab.evexflag = true
4020         evex := ab.evex
4021
4022         rexR := byte(1)
4023         evexR := byte(1)
4024         rexX := byte(1)
4025         rexB := byte(1)
4026         if r != nil {
4027                 if regrex[r.Reg]&Rxr != 0 {
4028                         rexR = 0 // "ModR/M.reg" selector 4th bit.
4029                 }
4030                 if regrex[r.Reg]&RxrEvex != 0 {
4031                         evexR = 0 // "ModR/M.reg" selector 5th bit.
4032                 }
4033         }
4034         if rm != nil {
4035                 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
4036                         rexX = 0
4037                 } else if regrex[rm.Index]&Rxx != 0 {
4038                         rexX = 0
4039                 }
4040                 if regrex[rm.Reg]&Rxb != 0 {
4041                         rexB = 0
4042                 }
4043         }
4044         // P0 = [R][X][B][R'][00][mm]
4045         p0 := (rexR << 7) |
4046                 (rexX << 6) |
4047                 (rexB << 5) |
4048                 (evexR << 4) |
4049                 (0 << 2) |
4050                 (evex.M() << 0)
4051
4052         vexV := byte(0)
4053         if v != nil {
4054                 // 4bit-wide reg index.
4055                 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4056         }
4057         vexV ^= 0x0F
4058         // P1 = [W][vvvv][1][pp]
4059         p1 := (evex.W() << 7) |
4060                 (vexV << 3) |
4061                 (1 << 2) |
4062                 (evex.P() << 0)
4063
4064         suffix := evexSuffixMap[p.Scond]
4065         evexZ := byte(0)
4066         evexLL := evex.L()
4067         evexB := byte(0)
4068         evexV := byte(1)
4069         evexA := byte(0)
4070         if suffix.zeroing {
4071                 if !evex.ZeroingEnabled() {
4072                         ctxt.Diag("unsupported zeroing: %v", p)
4073                 }
4074                 if k == nil {
4075                         // When you request zeroing you must specify a mask register.
4076                         // See issue 57952.
4077                         ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
4078                 } else if k.Reg == REG_K0 {
4079                         // The mask register must not be K0. That restriction is already
4080                         // handled by the Yknot0 restriction in the opcode tables, so we
4081                         // won't ever reach here. But put something sensible here just in case.
4082                         ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
4083                 }
4084                 evexZ = 1
4085         }
4086         switch {
4087         case suffix.rounding != rcUnset:
4088                 if rm != nil && rm.Type == obj.TYPE_MEM {
4089                         ctxt.Diag("illegal rounding with memory argument: %v", p)
4090                 } else if !evex.RoundingEnabled() {
4091                         ctxt.Diag("unsupported rounding: %v", p)
4092                 }
4093                 evexB = 1
4094                 evexLL = suffix.rounding
4095         case suffix.broadcast:
4096                 if rm == nil || rm.Type != obj.TYPE_MEM {
4097                         ctxt.Diag("illegal broadcast without memory argument: %v", p)
4098                 } else if !evex.BroadcastEnabled() {
4099                         ctxt.Diag("unsupported broadcast: %v", p)
4100                 }
4101                 evexB = 1
4102         case suffix.sae:
4103                 if rm != nil && rm.Type == obj.TYPE_MEM {
4104                         ctxt.Diag("illegal SAE with memory argument: %v", p)
4105                 } else if !evex.SaeEnabled() {
4106                         ctxt.Diag("unsupported SAE: %v", p)
4107                 }
4108                 evexB = 1
4109         }
4110         if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
4111                 evexV = 0
4112         } else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
4113                 evexV = 0 // VSR selector 5th bit.
4114         }
4115         if k != nil {
4116                 evexA = byte(reg[k.Reg])
4117         }
4118         // P2 = [z][L'L][b][V'][aaa]
4119         p2 := (evexZ << 7) |
4120                 (evexLL << 5) |
4121                 (evexB << 4) |
4122                 (evexV << 3) |
4123                 (evexA << 0)
4124
4125         const evexEscapeByte = 0x62
4126         ab.Put4(evexEscapeByte, p0, p1, p2)
4127         ab.Put1(evex.opcode)
4128 }
4129
4130 // Emit VEX prefix and opcode byte.
4131 // The three addresses are the r/m, vvvv, and reg fields.
4132 // The reg and rm arguments appear in the same order as the
4133 // arguments to asmand, which typically follows the call to asmvex.
4134 // The final two arguments are the VEX prefix (see encoding above)
4135 // and the opcode byte.
4136 // For details about vex prefix see:
4137 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
4138 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
4139         ab.vexflag = true
4140         rexR := 0
4141         if r != nil {
4142                 rexR = regrex[r.Reg] & Rxr
4143         }
4144         rexB := 0
4145         rexX := 0
4146         if rm != nil {
4147                 rexB = regrex[rm.Reg] & Rxb
4148                 rexX = regrex[rm.Index] & Rxx
4149         }
4150         vexM := (vex >> 3) & 0x7
4151         vexWLP := vex & 0x87
4152         vexV := byte(0)
4153         if v != nil {
4154                 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4155         }
4156         vexV ^= 0xF
4157         if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
4158                 // Can use 2-byte encoding.
4159                 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
4160         } else {
4161                 // Must use 3-byte encoding.
4162                 ab.Put3(0xc4,
4163                         (byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
4164                         vexV<<3|vexWLP,
4165                 )
4166         }
4167         ab.Put1(opcode)
4168 }
4169
4170 // regIndex returns register index that fits in 5 bits.
4171 //
4172 //      R         : 3 bit | legacy instructions     | N/A
4173 //      [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
4174 //      EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
4175 //
4176 // Examples:
4177 //
4178 //      REG_Z30 => 30
4179 //      REG_X15 => 15
4180 //      REG_R9  => 9
4181 //      REG_AX  => 0
4182 func regIndex(r int16) int {
4183         lower3bits := reg[r]
4184         high4bit := regrex[r] & Rxr << 1
4185         high5bit := regrex[r] & RxrEvex << 0
4186         return lower3bits | high4bit | high5bit
4187 }
4188
4189 // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
4190 // Reports errors via ctxt.
4191 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4192         // If any pair of the index, mask, or destination registers
4193         // are the same, illegal instruction trap (#UD) is triggered.
4194         index := regIndex(p.GetFrom3().Index)
4195         mask := regIndex(p.From.Reg)
4196         dest := regIndex(p.To.Reg)
4197         if dest == mask || dest == index || mask == index {
4198                 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
4199                 return false
4200         }
4201
4202         return true
4203 }
4204
4205 // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
4206 // Reports errors via ctxt.
4207 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4208         // Illegal instruction trap (#UD) is triggered if the destination vector
4209         // register is the same as index vector in VSIB.
4210         index := regIndex(p.From.Index)
4211         dest := regIndex(p.To.Reg)
4212         if dest == index {
4213                 ctxt.Diag("index and destination registers should be distinct: %v", p)
4214                 return false
4215         }
4216
4217         return true
4218 }
4219
4220 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
4221         o := opindex[p.As&obj.AMask]
4222
4223         if o == nil {
4224                 ctxt.Diag("asmins: missing op %v", p)
4225                 return
4226         }
4227
4228         if pre := prefixof(ctxt, &p.From); pre != 0 {
4229                 ab.Put1(byte(pre))
4230         }
4231         if pre := prefixof(ctxt, &p.To); pre != 0 {
4232                 ab.Put1(byte(pre))
4233         }
4234
4235         // Checks to warn about instruction/arguments combinations that
4236         // will unconditionally trigger illegal instruction trap (#UD).
4237         switch p.As {
4238         case AVGATHERDPD,
4239                 AVGATHERQPD,
4240                 AVGATHERDPS,
4241                 AVGATHERQPS,
4242                 AVPGATHERDD,
4243                 AVPGATHERQD,
4244                 AVPGATHERDQ,
4245                 AVPGATHERQQ:
4246                 if p.GetFrom3() == nil {
4247                         // gathers need a 3rd arg. See issue 58822.
4248                         ctxt.Diag("need a third arg for gather instruction: %v", p)
4249                         return
4250                 }
4251                 // AVX512 gather requires explicit K mask.
4252                 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
4253                         if !avx512gatherValid(ctxt, p) {
4254                                 return
4255                         }
4256                 } else {
4257                         if !avx2gatherValid(ctxt, p) {
4258                                 return
4259                         }
4260                 }
4261         }
4262
4263         if p.Ft == 0 {
4264                 p.Ft = uint8(oclass(ctxt, p, &p.From))
4265         }
4266         if p.Tt == 0 {
4267                 p.Tt = uint8(oclass(ctxt, p, &p.To))
4268         }
4269
4270         ft := int(p.Ft) * Ymax
4271         var f3t int
4272         tt := int(p.Tt) * Ymax
4273
4274         xo := obj.Bool2int(o.op[0] == 0x0f)
4275         z := 0
4276         var a *obj.Addr
4277         var l int
4278         var op int
4279         var q *obj.Prog
4280         var r *obj.Reloc
4281         var rel obj.Reloc
4282         var v int64
4283
4284         args := make([]int, 0, argListMax)
4285         if ft != Ynone*Ymax {
4286                 args = append(args, ft)
4287         }
4288         for i := range p.RestArgs {
4289                 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
4290         }
4291         if tt != Ynone*Ymax {
4292                 args = append(args, tt)
4293         }
4294
4295         for _, yt := range o.ytab {
4296                 // ytab matching is purely args-based,
4297                 // but AVX512 suffixes like "Z" or "RU_SAE" will
4298                 // add EVEX-only filter that will reject non-EVEX matches.
4299                 //
4300                 // Consider "VADDPD.BCST 2032(DX), X0, X0".
4301                 // Without this rule, operands will lead to VEX-encoded form
4302                 // and produce "c5b15813" encoding.
4303                 if !yt.match(args) {
4304                         // "xo" is always zero for VEX/EVEX encoded insts.
4305                         z += int(yt.zoffset) + xo
4306                 } else {
4307                         if p.Scond != 0 && !evexZcase(yt.zcase) {
4308                                 // Do not signal error and continue to search
4309                                 // for matching EVEX-encoded form.
4310                                 z += int(yt.zoffset)
4311                                 continue
4312                         }
4313
4314                         switch o.prefix {
4315                         case Px1: // first option valid only in 32-bit mode
4316                                 if ctxt.Arch.Family == sys.AMD64 && z == 0 {
4317                                         z += int(yt.zoffset) + xo
4318                                         continue
4319                                 }
4320                         case Pq: // 16 bit escape and opcode escape
4321                                 ab.Put2(Pe, Pm)
4322
4323                         case Pq3: // 16 bit escape and opcode escape + REX.W
4324                                 ab.rexflag |= Pw
4325                                 ab.Put2(Pe, Pm)
4326
4327                         case Pq4: // 66 0F 38
4328                                 ab.Put3(0x66, 0x0F, 0x38)
4329
4330                         case Pq4w: // 66 0F 38 + REX.W
4331                                 ab.rexflag |= Pw
4332                                 ab.Put3(0x66, 0x0F, 0x38)
4333
4334                         case Pq5: // F3 0F 38
4335                                 ab.Put3(0xF3, 0x0F, 0x38)
4336
4337                         case Pq5w: //  F3 0F 38 + REX.W
4338                                 ab.rexflag |= Pw
4339                                 ab.Put3(0xF3, 0x0F, 0x38)
4340
4341                         case Pf2, // xmm opcode escape
4342                                 Pf3:
4343                                 ab.Put2(o.prefix, Pm)
4344
4345                         case Pef3:
4346                                 ab.Put3(Pe, Pf3, Pm)
4347
4348                         case Pfw: // xmm opcode escape + REX.W
4349                                 ab.rexflag |= Pw
4350                                 ab.Put2(Pf3, Pm)
4351
4352                         case Pm: // opcode escape
4353                                 ab.Put1(Pm)
4354
4355                         case Pe: // 16 bit escape
4356                                 ab.Put1(Pe)
4357
4358                         case Pw: // 64-bit escape
4359                                 if ctxt.Arch.Family != sys.AMD64 {
4360                                         ctxt.Diag("asmins: illegal 64: %v", p)
4361                                 }
4362                                 ab.rexflag |= Pw
4363
4364                         case Pw8: // 64-bit escape if z >= 8
4365                                 if z >= 8 {
4366                                         if ctxt.Arch.Family != sys.AMD64 {
4367                                                 ctxt.Diag("asmins: illegal 64: %v", p)
4368                                         }
4369                                         ab.rexflag |= Pw
4370                                 }
4371
4372                         case Pb: // botch
4373                                 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
4374                                         goto bad
4375                                 }
4376                                 // NOTE(rsc): This is probably safe to do always,
4377                                 // but when enabled it chooses different encodings
4378                                 // than the old cmd/internal/obj/i386 code did,
4379                                 // which breaks our "same bits out" checks.
4380                                 // In particular, CMPB AX, $0 encodes as 80 f8 00
4381                                 // in the original obj/i386, and it would encode
4382                                 // (using a valid, shorter form) as 3c 00 if we enabled
4383                                 // the call to bytereg here.
4384                                 if ctxt.Arch.Family == sys.AMD64 {
4385                                         bytereg(&p.From, &p.Ft)
4386                                         bytereg(&p.To, &p.Tt)
4387                                 }
4388
4389                         case P32: // 32 bit but illegal if 64-bit mode
4390                                 if ctxt.Arch.Family == sys.AMD64 {
4391                                         ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
4392                                 }
4393
4394                         case Py: // 64-bit only, no prefix
4395                                 if ctxt.Arch.Family != sys.AMD64 {
4396                                         ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4397                                 }
4398
4399                         case Py1: // 64-bit only if z < 1, no prefix
4400                                 if z < 1 && ctxt.Arch.Family != sys.AMD64 {
4401                                         ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4402                                 }
4403
4404                         case Py3: // 64-bit only if z < 3, no prefix
4405                                 if z < 3 && ctxt.Arch.Family != sys.AMD64 {
4406                                         ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4407                                 }
4408                         }
4409
4410                         if z >= len(o.op) {
4411                                 log.Fatalf("asmins bad table %v", p)
4412                         }
4413                         op = int(o.op[z])
4414                         if op == 0x0f {
4415                                 ab.Put1(byte(op))
4416                                 z++
4417                                 op = int(o.op[z])
4418                         }
4419
4420                         switch yt.zcase {
4421                         default:
4422                                 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
4423                                 return
4424
4425                         case Zpseudo:
4426                                 break
4427
4428                         case Zlit:
4429                                 ab.PutOpBytesLit(z, &o.op)
4430
4431                         case Zlitr_m:
4432                                 ab.PutOpBytesLit(z, &o.op)
4433                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4434
4435                         case Zlitm_r:
4436                                 ab.PutOpBytesLit(z, &o.op)
4437                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4438
4439                         case Zlit_m_r:
4440                                 ab.PutOpBytesLit(z, &o.op)
4441                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4442
4443                         case Zmb_r:
4444                                 bytereg(&p.From, &p.Ft)
4445                                 fallthrough
4446
4447                         case Zm_r:
4448                                 ab.Put1(byte(op))
4449                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4450
4451                         case Z_m_r:
4452                                 ab.Put1(byte(op))
4453                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4454
4455                         case Zm2_r:
4456                                 ab.Put2(byte(op), o.op[z+1])
4457                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4458
4459                         case Zm_r_xm:
4460                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4461                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4462
4463                         case Zm_r_xm_nr:
4464                                 ab.rexflag = 0
4465                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4466                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4467
4468                         case Zm_r_i_xm:
4469                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4470                                 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
4471                                 ab.Put1(byte(p.To.Offset))
4472
4473                         case Zibm_r, Zibr_m:
4474                                 ab.PutOpBytesLit(z, &o.op)
4475                                 if yt.zcase == Zibr_m {
4476                                         ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4477                                 } else {
4478                                         ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4479                                 }
4480                                 switch {
4481                                 default:
4482                                         ab.Put1(byte(p.From.Offset))
4483                                 case yt.args[0] == Yi32 && o.prefix == Pe:
4484                                         ab.PutInt16(int16(p.From.Offset))
4485                                 case yt.args[0] == Yi32:
4486                                         ab.PutInt32(int32(p.From.Offset))
4487                                 }
4488
4489                         case Zaut_r:
4490                                 ab.Put1(0x8d) // leal
4491                                 if p.From.Type != obj.TYPE_ADDR {
4492                                         ctxt.Diag("asmins: Zaut sb type ADDR")
4493                                 }
4494                                 p.From.Type = obj.TYPE_MEM
4495                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4496                                 p.From.Type = obj.TYPE_ADDR
4497
4498                         case Zm_o:
4499                                 ab.Put1(byte(op))
4500                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4501
4502                         case Zr_m:
4503                                 ab.Put1(byte(op))
4504                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4505
4506                         case Zvex:
4507                                 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4508
4509                         case Zvex_rm_v_r:
4510                                 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4511                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4512
4513                         case Zvex_rm_v_ro:
4514                                 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4515                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4516
4517                         case Zvex_i_rm_vo:
4518                                 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4519                                 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
4520                                 ab.Put1(byte(p.From.Offset))
4521
4522                         case Zvex_i_r_v:
4523                                 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4524                                 regnum := byte(0x7)
4525                                 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
4526                                         regnum &= byte(p.GetFrom3().Reg - REG_X0)
4527                                 } else {
4528                                         regnum &= byte(p.GetFrom3().Reg - REG_Y0)
4529                                 }
4530                                 ab.Put1(o.op[z+2] | regnum)
4531                                 ab.Put1(byte(p.From.Offset))
4532
4533                         case Zvex_i_rm_v_r:
4534                                 imm, from, from3, to := unpackOps4(p)
4535                                 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4536                                 ab.asmand(ctxt, cursym, p, from, to)
4537                                 ab.Put1(byte(imm.Offset))
4538
4539                         case Zvex_i_rm_r:
4540                                 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
4541                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4542                                 ab.Put1(byte(p.From.Offset))
4543
4544                         case Zvex_v_rm_r:
4545                                 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
4546                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4547
4548                         case Zvex_r_v_rm:
4549                                 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
4550                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4551
4552                         case Zvex_rm_r_vo:
4553                                 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
4554                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4555
4556                         case Zvex_i_r_rm:
4557                                 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
4558                                 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4559                                 ab.Put1(byte(p.From.Offset))
4560
4561                         case Zvex_hr_rm_v_r:
4562                                 hr, from, from3, to := unpackOps4(p)
4563                                 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4564                                 ab.asmand(ctxt, cursym, p, from, to)
4565                                 ab.Put1(byte(regIndex(hr.Reg) << 4))
4566
4567                         case Zevex_k_rmo:
4568                                 ab.evex = newEVEXBits(z, &o.op)
4569                                 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
4570                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
4571
4572                         case Zevex_i_rm_vo:
4573                                 ab.evex = newEVEXBits(z, &o.op)
4574                                 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
4575                                 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
4576                                 ab.Put1(byte(p.From.Offset))
4577
4578                         case Zevex_i_rm_k_vo:
4579                                 imm, from, kmask, to := unpackOps4(p)
4580                                 ab.evex = newEVEXBits(z, &o.op)
4581                                 ab.asmevex(ctxt, p, from, to, nil, kmask)
4582                                 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
4583                                 ab.Put1(byte(imm.Offset))
4584
4585                         case Zevex_i_r_rm:
4586                                 ab.evex = newEVEXBits(z, &o.op)
4587                                 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
4588                                 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4589                                 ab.Put1(byte(p.From.Offset))
4590
4591                         case Zevex_i_r_k_rm:
4592                                 imm, from, kmask, to := unpackOps4(p)
4593                                 ab.evex = newEVEXBits(z, &o.op)
4594                                 ab.asmevex(ctxt, p, to, nil, from, kmask)
4595                                 ab.asmand(ctxt, cursym, p, to, from)
4596                                 ab.Put1(byte(imm.Offset))
4597
4598                         case Zevex_i_rm_r:
4599                                 ab.evex = newEVEXBits(z, &o.op)
4600                                 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
4601                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4602                                 ab.Put1(byte(p.From.Offset))
4603
4604                         case Zevex_i_rm_k_r:
4605                                 imm, from, kmask, to := unpackOps4(p)
4606                                 ab.evex = newEVEXBits(z, &o.op)
4607                                 ab.asmevex(ctxt, p, from, nil, to, kmask)
4608                                 ab.asmand(ctxt, cursym, p, from, to)
4609                                 ab.Put1(byte(imm.Offset))
4610
4611                         case Zevex_i_rm_v_r:
4612                                 imm, from, from3, to := unpackOps4(p)
4613                                 ab.evex = newEVEXBits(z, &o.op)
4614                                 ab.asmevex(ctxt, p, from, from3, to, nil)
4615                                 ab.asmand(ctxt, cursym, p, from, to)
4616                                 ab.Put1(byte(imm.Offset))
4617
4618                         case Zevex_i_rm_v_k_r:
4619                                 imm, from, from3, kmask, to := unpackOps5(p)
4620                                 ab.evex = newEVEXBits(z, &o.op)
4621                                 ab.asmevex(ctxt, p, from, from3, to, kmask)
4622                                 ab.asmand(ctxt, cursym, p, from, to)
4623                                 ab.Put1(byte(imm.Offset))
4624
4625                         case Zevex_r_v_rm:
4626                                 ab.evex = newEVEXBits(z, &o.op)
4627                                 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
4628                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4629
4630                         case Zevex_rm_v_r:
4631                                 ab.evex = newEVEXBits(z, &o.op)
4632                                 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
4633                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4634
4635                         case Zevex_rm_k_r:
4636                                 ab.evex = newEVEXBits(z, &o.op)
4637                                 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
4638                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4639
4640                         case Zevex_r_k_rm:
4641                                 ab.evex = newEVEXBits(z, &o.op)
4642                                 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
4643                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4644
4645                         case Zevex_rm_v_k_r:
4646                                 from, from3, kmask, to := unpackOps4(p)
4647                                 ab.evex = newEVEXBits(z, &o.op)
4648                                 ab.asmevex(ctxt, p, from, from3, to, kmask)
4649                                 ab.asmand(ctxt, cursym, p, from, to)
4650
4651                         case Zevex_r_v_k_rm:
4652                                 from, from3, kmask, to := unpackOps4(p)
4653                                 ab.evex = newEVEXBits(z, &o.op)
4654                                 ab.asmevex(ctxt, p, to, from3, from, kmask)
4655                                 ab.asmand(ctxt, cursym, p, to, from)
4656
4657                         case Zr_m_xm:
4658                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4659                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4660
4661                         case Zr_m_xm_nr:
4662                                 ab.rexflag = 0
4663                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4664                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4665
4666                         case Zo_m:
4667                                 ab.Put1(byte(op))
4668                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4669
4670                         case Zcallindreg:
4671                                 r = obj.Addrel(cursym)
4672                                 r.Off = int32(p.Pc)
4673                                 r.Type = objabi.R_CALLIND
4674                                 r.Siz = 0
4675                                 fallthrough
4676
4677                         case Zo_m64:
4678                                 ab.Put1(byte(op))
4679                                 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
4680
4681                         case Zm_ibo:
4682                                 ab.Put1(byte(op))
4683                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4684                                 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
4685
4686                         case Zibo_m:
4687                                 ab.Put1(byte(op))
4688                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4689                                 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4690
4691                         case Zibo_m_xm:
4692                                 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4693                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4694                                 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4695
4696                         case Z_ib, Zib_:
4697                                 if yt.zcase == Zib_ {
4698                                         a = &p.From
4699                                 } else {
4700                                         a = &p.To
4701                                 }
4702                                 ab.Put1(byte(op))
4703                                 if p.As == AXABORT {
4704                                         ab.Put1(o.op[z+1])
4705                                 }
4706                                 ab.Put1(byte(vaddr(ctxt, p, a, nil)))
4707
4708                         case Zib_rp:
4709                                 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4710                                 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
4711
4712                         case Zil_rp:
4713                                 ab.rexflag |= regrex[p.To.Reg] & Rxb
4714                                 ab.Put1(byte(op + reg[p.To.Reg]))
4715                                 if o.prefix == Pe {
4716                                         v = vaddr(ctxt, p, &p.From, nil)
4717                                         ab.PutInt16(int16(v))
4718                                 } else {
4719                                         ab.relput4(ctxt, cursym, p, &p.From)
4720                                 }
4721
4722                         case Zo_iw:
4723                                 ab.Put1(byte(op))
4724                                 if p.From.Type != obj.TYPE_NONE {
4725                                         v = vaddr(ctxt, p, &p.From, nil)
4726                                         ab.PutInt16(int16(v))
4727                                 }
4728
4729                         case Ziq_rp:
4730                                 v = vaddr(ctxt, p, &p.From, &rel)
4731                                 l = int(v >> 32)
4732                                 if l == 0 && rel.Siz != 8 {
4733                                         ab.rexflag &^= (0x40 | Rxw)
4734
4735                                         ab.rexflag |= regrex[p.To.Reg] & Rxb
4736                                         ab.Put1(byte(0xb8 + reg[p.To.Reg]))
4737                                         if rel.Type != 0 {
4738                                                 r = obj.Addrel(cursym)
4739                                                 *r = rel
4740                                                 r.Off = int32(p.Pc + int64(ab.Len()))
4741                                         }
4742
4743                                         ab.PutInt32(int32(v))
4744                                 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
4745                                         ab.Put1(0xc7)
4746                                         ab.asmando(ctxt, cursym, p, &p.To, 0)
4747
4748                                         ab.PutInt32(int32(v)) // need all 8
4749                                 } else {
4750                                         ab.rexflag |= regrex[p.To.Reg] & Rxb
4751                                         ab.Put1(byte(op + reg[p.To.Reg]))
4752                                         if rel.Type != 0 {
4753                                                 r = obj.Addrel(cursym)
4754                                                 *r = rel
4755                                                 r.Off = int32(p.Pc + int64(ab.Len()))
4756                                         }
4757
4758                                         ab.PutInt64(v)
4759                                 }
4760
4761                         case Zib_rr:
4762                                 ab.Put1(byte(op))
4763                                 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4764                                 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4765
4766                         case Z_il, Zil_:
4767                                 if yt.zcase == Zil_ {
4768                                         a = &p.From
4769                                 } else {
4770                                         a = &p.To
4771                                 }
4772                                 ab.Put1(byte(op))
4773                                 if o.prefix == Pe {
4774                                         v = vaddr(ctxt, p, a, nil)
4775                                         ab.PutInt16(int16(v))
4776                                 } else {
4777                                         ab.relput4(ctxt, cursym, p, a)
4778                                 }
4779
4780                         case Zm_ilo, Zilo_m:
4781                                 ab.Put1(byte(op))
4782                                 if yt.zcase == Zilo_m {
4783                                         a = &p.From
4784                                         ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4785                                 } else {
4786                                         a = &p.To
4787                                         ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4788                                 }
4789
4790                                 if o.prefix == Pe {
4791                                         v = vaddr(ctxt, p, a, nil)
4792                                         ab.PutInt16(int16(v))
4793                                 } else {
4794                                         ab.relput4(ctxt, cursym, p, a)
4795                                 }
4796
4797                         case Zil_rr:
4798                                 ab.Put1(byte(op))
4799                                 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4800                                 if o.prefix == Pe {
4801                                         v = vaddr(ctxt, p, &p.From, nil)
4802                                         ab.PutInt16(int16(v))
4803                                 } else {
4804                                         ab.relput4(ctxt, cursym, p, &p.From)
4805                                 }
4806
4807                         case Z_rp:
4808                                 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4809                                 ab.Put1(byte(op + reg[p.To.Reg]))
4810
4811                         case Zrp_:
4812                                 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
4813                                 ab.Put1(byte(op + reg[p.From.Reg]))
4814
4815                         case Zcallcon, Zjmpcon:
4816                                 if yt.zcase == Zcallcon {
4817                                         ab.Put1(byte(op))
4818                                 } else {
4819                                         ab.Put1(o.op[z+1])
4820                                 }
4821                                 r = obj.Addrel(cursym)
4822                                 r.Off = int32(p.Pc + int64(ab.Len()))
4823                                 r.Type = objabi.R_PCREL
4824                                 r.Siz = 4
4825                                 r.Add = p.To.Offset
4826                                 ab.PutInt32(0)
4827
4828                         case Zcallind:
4829                                 ab.Put2(byte(op), o.op[z+1])
4830                                 r = obj.Addrel(cursym)
4831                                 r.Off = int32(p.Pc + int64(ab.Len()))
4832                                 if ctxt.Arch.Family == sys.AMD64 {
4833                                         r.Type = objabi.R_PCREL
4834                                 } else {
4835                                         r.Type = objabi.R_ADDR
4836                                 }
4837                                 r.Siz = 4
4838                                 r.Add = p.To.Offset
4839                                 r.Sym = p.To.Sym
4840                                 ab.PutInt32(0)
4841
4842                         case Zcall, Zcallduff:
4843                                 if p.To.Sym == nil {
4844                                         ctxt.Diag("call without target")
4845                                         ctxt.DiagFlush()
4846                                         log.Fatalf("bad code")
4847                                 }
4848
4849                                 if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
4850                                         ctxt.Diag("directly calling duff when dynamically linking Go")
4851                                 }
4852
4853                                 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4854                                         // Maintain BP around call, since duffcopy/duffzero can't do it
4855                                         // (the call jumps into the middle of the function).
4856                                         // This makes it possible to see call sites for duffcopy/duffzero in
4857                                         // BP-based profiling tools like Linux perf (which is the
4858                                         // whole point of maintaining frame pointers in Go).
4859                                         // MOVQ BP, -16(SP)
4860                                         // LEAQ -16(SP), BP
4861                                         ab.Put(bpduff1)
4862                                 }
4863                                 ab.Put1(byte(op))
4864                                 r = obj.Addrel(cursym)
4865                                 r.Off = int32(p.Pc + int64(ab.Len()))
4866                                 r.Sym = p.To.Sym
4867                                 r.Add = p.To.Offset
4868                                 r.Type = objabi.R_CALL
4869                                 r.Siz = 4
4870                                 ab.PutInt32(0)
4871
4872                                 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4873                                         // Pop BP pushed above.
4874                                         // MOVQ 0(BP), BP
4875                                         ab.Put(bpduff2)
4876                                 }
4877
4878                         // TODO: jump across functions needs reloc
4879                         case Zbr, Zjmp, Zloop:
4880                                 if p.As == AXBEGIN {
4881                                         ab.Put1(byte(op))
4882                                 }
4883                                 if p.To.Sym != nil {
4884                                         if yt.zcase != Zjmp {
4885                                                 ctxt.Diag("branch to ATEXT")
4886                                                 ctxt.DiagFlush()
4887                                                 log.Fatalf("bad code")
4888                                         }
4889
4890                                         ab.Put1(o.op[z+1])
4891                                         r = obj.Addrel(cursym)
4892                                         r.Off = int32(p.Pc + int64(ab.Len()))
4893                                         r.Sym = p.To.Sym
4894                                         // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
4895                                         // it can point to a trampoline instead of the destination itself.
4896                                         r.Type = objabi.R_CALL
4897                                         r.Siz = 4
4898                                         ab.PutInt32(0)
4899                                         break
4900                                 }
4901
4902                                 // Assumes q is in this function.
4903                                 // TODO: Check in input, preserve in brchain.
4904
4905                                 // Fill in backward jump now.
4906                                 q = p.To.Target()
4907
4908                                 if q == nil {
4909                                         ctxt.Diag("jmp/branch/loop without target")
4910                                         ctxt.DiagFlush()
4911                                         log.Fatalf("bad code")
4912                                 }
4913
4914                                 if p.Back&branchBackwards != 0 {
4915                                         v = q.Pc - (p.Pc + 2)
4916                                         if v >= -128 && p.As != AXBEGIN {
4917                                                 if p.As == AJCXZL {
4918                                                         ab.Put1(0x67)
4919                                                 }
4920                                                 ab.Put2(byte(op), byte(v))
4921                                         } else if yt.zcase == Zloop {
4922                                                 ctxt.Diag("loop too far: %v", p)
4923                                         } else {
4924                                                 v -= 5 - 2
4925                                                 if p.As == AXBEGIN {
4926                                                         v--
4927                                                 }
4928                                                 if yt.zcase == Zbr {
4929                                                         ab.Put1(0x0f)
4930                                                         v--
4931                                                 }
4932
4933                                                 ab.Put1(o.op[z+1])
4934                                                 ab.PutInt32(int32(v))
4935                                         }
4936
4937                                         break
4938                                 }
4939
4940                                 // Annotate target; will fill in later.
4941                                 p.Forwd = q.Rel
4942
4943                                 q.Rel = p
4944                                 if p.Back&branchShort != 0 && p.As != AXBEGIN {
4945                                         if p.As == AJCXZL {
4946                                                 ab.Put1(0x67)
4947                                         }
4948                                         ab.Put2(byte(op), 0)
4949                                 } else if yt.zcase == Zloop {
4950                                         ctxt.Diag("loop too far: %v", p)
4951                                 } else {
4952                                         if yt.zcase == Zbr {
4953                                                 ab.Put1(0x0f)
4954                                         }
4955                                         ab.Put1(o.op[z+1])
4956                                         ab.PutInt32(0)
4957                                 }
4958
4959                         case Zbyte:
4960                                 v = vaddr(ctxt, p, &p.From, &rel)
4961                                 if rel.Siz != 0 {
4962                                         rel.Siz = uint8(op)
4963                                         r = obj.Addrel(cursym)
4964                                         *r = rel
4965                                         r.Off = int32(p.Pc + int64(ab.Len()))
4966                                 }
4967
4968                                 ab.Put1(byte(v))
4969                                 if op > 1 {
4970                                         ab.Put1(byte(v >> 8))
4971                                         if op > 2 {
4972                                                 ab.PutInt16(int16(v >> 16))
4973                                                 if op > 4 {
4974                                                         ab.PutInt32(int32(v >> 32))
4975                                                 }
4976                                         }
4977                                 }
4978                         }
4979
4980                         return
4981                 }
4982         }
4983         f3t = Ynone * Ymax
4984         if p.GetFrom3() != nil {
4985                 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
4986         }
4987         for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
4988                 var pp obj.Prog
4989                 var t []byte
4990                 if p.As == mo[0].as {
4991                         if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
4992                                 t = mo[0].op[:]
4993                                 switch mo[0].code {
4994                                 default:
4995                                         ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
4996
4997                                 case movLit:
4998                                         for z = 0; t[z] != 0; z++ {
4999                                                 ab.Put1(t[z])
5000                                         }
5001
5002                                 case movRegMem:
5003                                         ab.Put1(t[0])
5004                                         ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
5005
5006                                 case movMemReg:
5007                                         ab.Put1(t[0])
5008                                         ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
5009
5010                                 case movRegMem2op: // r,m - 2op
5011                                         ab.Put2(t[0], t[1])
5012                                         ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
5013                                         ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
5014
5015                                 case movMemReg2op:
5016                                         ab.Put2(t[0], t[1])
5017                                         ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
5018                                         ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
5019
5020                                 case movFullPtr:
5021                                         if t[0] != 0 {
5022                                                 ab.Put1(t[0])
5023                                         }
5024                                         switch p.To.Index {
5025                                         default:
5026                                                 goto bad
5027
5028                                         case REG_DS:
5029                                                 ab.Put1(0xc5)
5030
5031                                         case REG_SS:
5032                                                 ab.Put2(0x0f, 0xb2)
5033
5034                                         case REG_ES:
5035                                                 ab.Put1(0xc4)
5036
5037                                         case REG_FS:
5038                                                 ab.Put2(0x0f, 0xb4)
5039
5040                                         case REG_GS:
5041                                                 ab.Put2(0x0f, 0xb5)
5042                                         }
5043
5044                                         ab.asmand(ctxt, cursym, p, &p.From, &p.To)
5045
5046                                 case movDoubleShift:
5047                                         if t[0] == Pw {
5048                                                 if ctxt.Arch.Family != sys.AMD64 {
5049                                                         ctxt.Diag("asmins: illegal 64: %v", p)
5050                                                 }
5051                                                 ab.rexflag |= Pw
5052                                                 t = t[1:]
5053                                         } else if t[0] == Pe {
5054                                                 ab.Put1(Pe)
5055                                                 t = t[1:]
5056                                         }
5057
5058                                         switch p.From.Type {
5059                                         default:
5060                                                 goto bad
5061
5062                                         case obj.TYPE_CONST:
5063                                                 ab.Put2(0x0f, t[0])
5064                                                 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5065                                                 ab.Put1(byte(p.From.Offset))
5066
5067                                         case obj.TYPE_REG:
5068                                                 switch p.From.Reg {
5069                                                 default:
5070                                                         goto bad
5071
5072                                                 case REG_CL, REG_CX:
5073                                                         ab.Put2(0x0f, t[1])
5074                                                         ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5075                                                 }
5076                                         }
5077
5078                                 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5079                                 // where you load the TLS base register into a register and then index off that
5080                                 // register to access the actual TLS variables. Systems that allow direct TLS access
5081                                 // are handled in prefixof above and should not be listed here.
5082                                 case movTLSReg:
5083                                         if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
5084                                                 ctxt.Diag("invalid load of TLS: %v", p)
5085                                         }
5086
5087                                         if ctxt.Arch.Family == sys.I386 {
5088                                                 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5089                                                 // where you load the TLS base register into a register and then index off that
5090                                                 // register to access the actual TLS variables. Systems that allow direct TLS access
5091                                                 // are handled in prefixof above and should not be listed here.
5092                                                 switch ctxt.Headtype {
5093                                                 default:
5094                                                         log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5095
5096                                                 case objabi.Hlinux, objabi.Hfreebsd:
5097                                                         if ctxt.Flag_shared {
5098                                                                 // Note that this is not generating the same insns as the other cases.
5099                                                                 //     MOV TLS, dst
5100                                                                 // becomes
5101                                                                 //     call __x86.get_pc_thunk.dst
5102                                                                 //     movl (gotpc + g@gotntpoff)(dst), dst
5103                                                                 // which is encoded as
5104                                                                 //     call __x86.get_pc_thunk.dst
5105                                                                 //     movq 0(dst), dst
5106                                                                 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
5107                                                                 // is g, which we can't check here, but will when we assemble the second
5108                                                                 // instruction.
5109                                                                 dst := p.To.Reg
5110                                                                 ab.Put1(0xe8)
5111                                                                 r = obj.Addrel(cursym)
5112                                                                 r.Off = int32(p.Pc + int64(ab.Len()))
5113                                                                 r.Type = objabi.R_CALL
5114                                                                 r.Siz = 4
5115                                                                 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
5116                                                                 ab.PutInt32(0)
5117
5118                                                                 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
5119                                                                 r = obj.Addrel(cursym)
5120                                                                 r.Off = int32(p.Pc + int64(ab.Len()))
5121                                                                 r.Type = objabi.R_TLS_IE
5122                                                                 r.Siz = 4
5123                                                                 r.Add = 2
5124                                                                 ab.PutInt32(0)
5125                                                         } else {
5126                                                                 // ELF TLS base is 0(GS).
5127                                                                 pp.From = p.From
5128
5129                                                                 pp.From.Type = obj.TYPE_MEM
5130                                                                 pp.From.Reg = REG_GS
5131                                                                 pp.From.Offset = 0
5132                                                                 pp.From.Index = REG_NONE
5133                                                                 pp.From.Scale = 0
5134                                                                 ab.Put2(0x65, // GS
5135                                                                         0x8B)
5136                                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5137                                                         }
5138                                                 case objabi.Hplan9:
5139                                                         pp.From = obj.Addr{}
5140                                                         pp.From.Type = obj.TYPE_MEM
5141                                                         pp.From.Name = obj.NAME_EXTERN
5142                                                         pp.From.Sym = plan9privates
5143                                                         pp.From.Offset = 0
5144                                                         pp.From.Index = REG_NONE
5145                                                         ab.Put1(0x8B)
5146                                                         ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5147                                                 }
5148                                                 break
5149                                         }
5150
5151                                         switch ctxt.Headtype {
5152                                         default:
5153                                                 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5154
5155                                         case objabi.Hlinux, objabi.Hfreebsd:
5156                                                 if !ctxt.Flag_shared {
5157                                                         log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
5158                                                 }
5159                                                 // Note that this is not generating the same insn as the other cases.
5160                                                 //     MOV TLS, R_to
5161                                                 // becomes
5162                                                 //     movq g@gottpoff(%rip), R_to
5163                                                 // which is encoded as
5164                                                 //     movq 0(%rip), R_to
5165                                                 // and a R_TLS_IE reloc. This all assumes the only tls variable we access
5166                                                 // is g, which we can't check here, but will when we assemble the second
5167                                                 // instruction.
5168                                                 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
5169
5170                                                 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
5171                                                 r = obj.Addrel(cursym)
5172                                                 r.Off = int32(p.Pc + int64(ab.Len()))
5173                                                 r.Type = objabi.R_TLS_IE
5174                                                 r.Siz = 4
5175                                                 r.Add = -4
5176                                                 ab.PutInt32(0)
5177
5178                                         case objabi.Hplan9:
5179                                                 pp.From = obj.Addr{}
5180                                                 pp.From.Type = obj.TYPE_MEM
5181                                                 pp.From.Name = obj.NAME_EXTERN
5182                                                 pp.From.Sym = plan9privates
5183                                                 pp.From.Offset = 0
5184                                                 pp.From.Index = REG_NONE
5185                                                 ab.rexflag |= Pw
5186                                                 ab.Put1(0x8B)
5187                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5188
5189                                         case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
5190                                                 // TLS base is 0(FS).
5191                                                 pp.From = p.From
5192
5193                                                 pp.From.Type = obj.TYPE_MEM
5194                                                 pp.From.Name = obj.NAME_NONE
5195                                                 pp.From.Reg = REG_NONE
5196                                                 pp.From.Offset = 0
5197                                                 pp.From.Index = REG_NONE
5198                                                 pp.From.Scale = 0
5199                                                 ab.rexflag |= Pw
5200                                                 ab.Put2(0x64, // FS
5201                                                         0x8B)
5202                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5203                                         }
5204                                 }
5205                                 return
5206                         }
5207                 }
5208         }
5209         goto bad
5210
5211 bad:
5212         if ctxt.Arch.Family != sys.AMD64 {
5213                 // here, the assembly has failed.
5214                 // if it's a byte instruction that has
5215                 // unaddressable registers, try to
5216                 // exchange registers and reissue the
5217                 // instruction with the operands renamed.
5218                 pp := *p
5219
5220                 unbytereg(&pp.From, &pp.Ft)
5221                 unbytereg(&pp.To, &pp.Tt)
5222
5223                 z := int(p.From.Reg)
5224                 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5225                         // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5226                         // For now, different to keep bit-for-bit compatibility.
5227                         if ctxt.Arch.Family == sys.I386 {
5228                                 breg := byteswapreg(ctxt, &p.To)
5229                                 if breg != REG_AX {
5230                                         ab.Put1(0x87) // xchg lhs,bx
5231                                         ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5232                                         subreg(&pp, z, breg)
5233                                         ab.doasm(ctxt, cursym, &pp)
5234                                         ab.Put1(0x87) // xchg lhs,bx
5235                                         ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5236                                 } else {
5237                                         ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5238                                         subreg(&pp, z, REG_AX)
5239                                         ab.doasm(ctxt, cursym, &pp)
5240                                         ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5241                                 }
5242                                 return
5243                         }
5244
5245                         if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
5246                                 // We certainly don't want to exchange
5247                                 // with AX if the op is MUL or DIV.
5248                                 ab.Put1(0x87) // xchg lhs,bx
5249                                 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5250                                 subreg(&pp, z, REG_BX)
5251                                 ab.doasm(ctxt, cursym, &pp)
5252                                 ab.Put1(0x87) // xchg lhs,bx
5253                                 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5254                         } else {
5255                                 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5256                                 subreg(&pp, z, REG_AX)
5257                                 ab.doasm(ctxt, cursym, &pp)
5258                                 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5259                         }
5260                         return
5261                 }
5262
5263                 z = int(p.To.Reg)
5264                 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5265                         // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5266                         // For now, different to keep bit-for-bit compatibility.
5267                         if ctxt.Arch.Family == sys.I386 {
5268                                 breg := byteswapreg(ctxt, &p.From)
5269                                 if breg != REG_AX {
5270                                         ab.Put1(0x87) //xchg rhs,bx
5271                                         ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5272                                         subreg(&pp, z, breg)
5273                                         ab.doasm(ctxt, cursym, &pp)
5274                                         ab.Put1(0x87) // xchg rhs,bx
5275                                         ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5276                                 } else {
5277                                         ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5278                                         subreg(&pp, z, REG_AX)
5279                                         ab.doasm(ctxt, cursym, &pp)
5280                                         ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5281                                 }
5282                                 return
5283                         }
5284
5285                         if isax(&p.From) {
5286                                 ab.Put1(0x87) // xchg rhs,bx
5287                                 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5288                                 subreg(&pp, z, REG_BX)
5289                                 ab.doasm(ctxt, cursym, &pp)
5290                                 ab.Put1(0x87) // xchg rhs,bx
5291                                 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5292                         } else {
5293                                 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5294                                 subreg(&pp, z, REG_AX)
5295                                 ab.doasm(ctxt, cursym, &pp)
5296                                 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5297                         }
5298                         return
5299                 }
5300         }
5301
5302         ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
5303 }
5304
5305 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
5306 // which is not referenced in a.
5307 // If a is empty, it returns BX to account for MULB-like instructions
5308 // that might use DX and AX.
5309 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
5310         cana, canb, canc, cand := true, true, true, true
5311         if a.Type == obj.TYPE_NONE {
5312                 cana, cand = false, false
5313         }
5314
5315         if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
5316                 switch a.Reg {
5317                 case REG_NONE:
5318                         cana, cand = false, false
5319                 case REG_AX, REG_AL, REG_AH:
5320                         cana = false
5321                 case REG_BX, REG_BL, REG_BH:
5322                         canb = false
5323                 case REG_CX, REG_CL, REG_CH:
5324                         canc = false
5325                 case REG_DX, REG_DL, REG_DH:
5326                         cand = false
5327                 }
5328         }
5329
5330         if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
5331                 switch a.Index {
5332                 case REG_AX:
5333                         cana = false
5334                 case REG_BX:
5335                         canb = false
5336                 case REG_CX:
5337                         canc = false
5338                 case REG_DX:
5339                         cand = false
5340                 }
5341         }
5342
5343         switch {
5344         case cana:
5345                 return REG_AX
5346         case canb:
5347                 return REG_BX
5348         case canc:
5349                 return REG_CX
5350         case cand:
5351                 return REG_DX
5352         default:
5353                 ctxt.Diag("impossible byte register")
5354                 ctxt.DiagFlush()
5355                 log.Fatalf("bad code")
5356                 return 0
5357         }
5358 }
5359
5360 func isbadbyte(a *obj.Addr) bool {
5361         return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
5362 }
5363
5364 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
5365         ab.Reset()
5366
5367         ab.rexflag = 0
5368         ab.vexflag = false
5369         ab.evexflag = false
5370         mark := ab.Len()
5371         ab.doasm(ctxt, cursym, p)
5372         if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5373                 // as befits the whole approach of the architecture,
5374                 // the rex prefix must appear before the first opcode byte
5375                 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
5376                 // before the 0f opcode escape!), or it might be ignored.
5377                 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
5378                 if ctxt.Arch.Family != sys.AMD64 {
5379                         ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
5380                 }
5381                 n := ab.Len()
5382                 var np int
5383                 for np = mark; np < n; np++ {
5384                         c := ab.At(np)
5385                         if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
5386                                 break
5387                         }
5388                 }
5389                 ab.Insert(np, byte(0x40|ab.rexflag))
5390         }
5391
5392         n := ab.Len()
5393         for i := len(cursym.R) - 1; i >= 0; i-- {
5394                 r := &cursym.R[i]
5395                 if int64(r.Off) < p.Pc {
5396                         break
5397                 }
5398                 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5399                         r.Off++
5400                 }
5401                 if r.Type == objabi.R_PCREL {
5402                         if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
5403                                 // PC-relative addressing is relative to the end of the instruction,
5404                                 // but the relocations applied by the linker are relative to the end
5405                                 // of the relocation. Because immediate instruction
5406                                 // arguments can follow the PC-relative memory reference in the
5407                                 // instruction encoding, the two may not coincide. In this case,
5408                                 // adjust addend so that linker can keep relocating relative to the
5409                                 // end of the relocation.
5410                                 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
5411                         } else if ctxt.Arch.Family == sys.I386 {
5412                                 // On 386 PC-relative addressing (for non-call/jmp instructions)
5413                                 // assumes that the previous instruction loaded the PC of the end
5414                                 // of that instruction into CX, so the adjustment is relative to
5415                                 // that.
5416                                 r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5417                         }
5418                 }
5419                 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
5420                         // On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
5421                         r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5422                 }
5423
5424         }
5425 }
5426
5427 // unpackOps4 extracts 4 operands from p.
5428 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
5429         return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
5430 }
5431
5432 // unpackOps5 extracts 5 operands from p.
5433 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
5434         return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
5435 }