]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/internal/obj/x86/asm6.go
internal/buildcfg: move build configuration out of cmd/internal/objabi
[gostls13.git] / src / cmd / internal / obj / x86 / asm6.go
1 // Inferno utils/6l/span.c
2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
3 //
4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 //      Portions Copyright © 2004,2006 Bruce Ellis
9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 //      Portions Copyright © 2009 The Go Authors. All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 package x86
32
33 import (
34         "cmd/internal/obj"
35         "cmd/internal/objabi"
36         "cmd/internal/sys"
37         "encoding/binary"
38         "fmt"
39         "internal/buildcfg"
40         "log"
41         "strings"
42 )
43
44 var (
45         plan9privates *obj.LSym
46         deferreturn   *obj.LSym
47 )
48
49 // Instruction layout.
50
51 // Loop alignment constants:
52 // want to align loop entry to loopAlign-byte boundary,
53 // and willing to insert at most maxLoopPad bytes of NOP to do so.
54 // We define a loop entry as the target of a backward jump.
55 //
56 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
57 // and it aligns all jump targets, not just backward jump targets.
58 //
59 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
60 // is very slight but negative, so the alignment is disabled by
61 // setting MaxLoopPad = 0. The code is here for reference and
62 // for future experiments.
63 //
64 const (
65         loopAlign  = 16
66         maxLoopPad = 0
67 )
68
69 // Bit flags that are used to express jump target properties.
70 const (
71         // branchBackwards marks targets that are located behind.
72         // Used to express jumps to loop headers.
73         branchBackwards = (1 << iota)
74         // branchShort marks branches those target is close,
75         // with offset is in -128..127 range.
76         branchShort
77         // branchLoopHead marks loop entry.
78         // Used to insert padding for misaligned loops.
79         branchLoopHead
80 )
81
82 // opBytes holds optab encoding bytes.
83 // Each ytab reserves fixed amount of bytes in this array.
84 //
85 // The size should be the minimal number of bytes that
86 // are enough to hold biggest optab op lines.
87 type opBytes [31]uint8
88
89 type Optab struct {
90         as     obj.As
91         ytab   []ytab
92         prefix uint8
93         op     opBytes
94 }
95
96 type movtab struct {
97         as   obj.As
98         ft   uint8
99         f3t  uint8
100         tt   uint8
101         code uint8
102         op   [4]uint8
103 }
104
105 const (
106         Yxxx = iota
107         Ynone
108         Yi0 // $0
109         Yi1 // $1
110         Yu2 // $x, x fits in uint2
111         Yi8 // $x, x fits in int8
112         Yu8 // $x, x fits in uint8
113         Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
114         Ys32
115         Yi32
116         Yi64
117         Yiauto
118         Yal
119         Ycl
120         Yax
121         Ycx
122         Yrb
123         Yrl
124         Yrl32 // Yrl on 32-bit system
125         Yrf
126         Yf0
127         Yrx
128         Ymb
129         Yml
130         Ym
131         Ybr
132         Ycs
133         Yss
134         Yds
135         Yes
136         Yfs
137         Ygs
138         Ygdtr
139         Yidtr
140         Yldtr
141         Ymsw
142         Ytask
143         Ycr0
144         Ycr1
145         Ycr2
146         Ycr3
147         Ycr4
148         Ycr5
149         Ycr6
150         Ycr7
151         Ycr8
152         Ydr0
153         Ydr1
154         Ydr2
155         Ydr3
156         Ydr4
157         Ydr5
158         Ydr6
159         Ydr7
160         Ytr0
161         Ytr1
162         Ytr2
163         Ytr3
164         Ytr4
165         Ytr5
166         Ytr6
167         Ytr7
168         Ymr
169         Ymm
170         Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
171         YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
172         Yxr           // X0..X15
173         YxrEvex       // X0..X31
174         Yxm
175         YxmEvex       // YxrEvex+Ym
176         Yxvm          // VSIB vector array; vm32x/vm64x
177         YxvmEvex      // Yxvm which permits High-16 X register as index.
178         YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
179         Yyr           // Y0..Y15
180         YyrEvex       // Y0..Y31
181         Yym
182         YymEvex   // YyrEvex+Ym
183         Yyvm      // VSIB vector array; vm32y/vm64y
184         YyvmEvex  // Yyvm which permits High-16 Y register as index.
185         YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
186         Yzr       // Z0..Z31
187         Yzm       // Yzr+Ym
188         Yzvm      // VSIB vector array; vm32z/vm64z
189         Yk0       // K0
190         Yknot0    // K1..K7; write mask
191         Yk        // K0..K7; used for KOP
192         Ykm       // Yk+Ym; used for KOP
193         Ytls
194         Ytextsize
195         Yindir
196         Ymax
197 )
198
199 const (
200         Zxxx = iota
201         Zlit
202         Zlitm_r
203         Zlitr_m
204         Zlit_m_r
205         Z_rp
206         Zbr
207         Zcall
208         Zcallcon
209         Zcallduff
210         Zcallind
211         Zcallindreg
212         Zib_
213         Zib_rp
214         Zibo_m
215         Zibo_m_xm
216         Zil_
217         Zil_rp
218         Ziq_rp
219         Zilo_m
220         Zjmp
221         Zjmpcon
222         Zloop
223         Zo_iw
224         Zm_o
225         Zm_r
226         Z_m_r
227         Zm2_r
228         Zm_r_xm
229         Zm_r_i_xm
230         Zm_r_xm_nr
231         Zr_m_xm_nr
232         Zibm_r // mmx1,mmx2/mem64,imm8
233         Zibr_m
234         Zmb_r
235         Zaut_r
236         Zo_m
237         Zo_m64
238         Zpseudo
239         Zr_m
240         Zr_m_xm
241         Zrp_
242         Z_ib
243         Z_il
244         Zm_ibo
245         Zm_ilo
246         Zib_rr
247         Zil_rr
248         Zbyte
249
250         Zvex_rm_v_r
251         Zvex_rm_v_ro
252         Zvex_r_v_rm
253         Zvex_i_rm_vo
254         Zvex_v_rm_r
255         Zvex_i_rm_r
256         Zvex_i_r_v
257         Zvex_i_rm_v_r
258         Zvex
259         Zvex_rm_r_vo
260         Zvex_i_r_rm
261         Zvex_hr_rm_v_r
262
263         Zevex_first
264         Zevex_i_r_k_rm
265         Zevex_i_r_rm
266         Zevex_i_rm_k_r
267         Zevex_i_rm_k_vo
268         Zevex_i_rm_r
269         Zevex_i_rm_v_k_r
270         Zevex_i_rm_v_r
271         Zevex_i_rm_vo
272         Zevex_k_rmo
273         Zevex_r_k_rm
274         Zevex_r_v_k_rm
275         Zevex_r_v_rm
276         Zevex_rm_k_r
277         Zevex_rm_v_k_r
278         Zevex_rm_v_r
279         Zevex_last
280
281         Zmax
282 )
283
284 const (
285         Px   = 0
286         Px1  = 1    // symbolic; exact value doesn't matter
287         P32  = 0x32 // 32-bit only
288         Pe   = 0x66 // operand escape
289         Pm   = 0x0f // 2byte opcode escape
290         Pq   = 0xff // both escapes: 66 0f
291         Pb   = 0xfe // byte operands
292         Pf2  = 0xf2 // xmm escape 1: f2 0f
293         Pf3  = 0xf3 // xmm escape 2: f3 0f
294         Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
295         Pq3  = 0x67 // xmm escape 3: 66 48 0f
296         Pq4  = 0x68 // xmm escape 4: 66 0F 38
297         Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
298         Pq5  = 0x6a // xmm escape 5: F3 0F 38
299         Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
300         Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
301         Pw   = 0x48 // Rex.w
302         Pw8  = 0x90 // symbolic; exact value doesn't matter
303         Py   = 0x80 // defaults to 64-bit mode
304         Py1  = 0x81 // symbolic; exact value doesn't matter
305         Py3  = 0x83 // symbolic; exact value doesn't matter
306         Pavx = 0x84 // symbolic: exact value doesn't matter
307
308         RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
309         Rxw     = 1 << 3 // =1, 64-bit operand size
310         Rxr     = 1 << 2 // extend modrm reg
311         Rxx     = 1 << 1 // extend sib index
312         Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
313 )
314
315 const (
316         // Encoding for VEX prefix in tables.
317         // The P, L, and W fields are chosen to match
318         // their eventual locations in the VEX prefix bytes.
319
320         // Encoding for VEX prefix in tables.
321         // The P, L, and W fields are chosen to match
322         // their eventual locations in the VEX prefix bytes.
323
324         // Using spare bit to make leading [E]VEX encoding byte different from
325         // 0x0f even if all other VEX fields are 0.
326         avxEscape = 1 << 6
327
328         // P field - 2 bits
329         vex66 = 1 << 0
330         vexF3 = 2 << 0
331         vexF2 = 3 << 0
332         // L field - 1 bit
333         vexLZ  = 0 << 2
334         vexLIG = 0 << 2
335         vex128 = 0 << 2
336         vex256 = 1 << 2
337         // W field - 1 bit
338         vexWIG = 0 << 7
339         vexW0  = 0 << 7
340         vexW1  = 1 << 7
341         // M field - 5 bits, but mostly reserved; we can store up to 3
342         vex0F   = 1 << 3
343         vex0F38 = 2 << 3
344         vex0F3A = 3 << 3
345 )
346
347 var ycover [Ymax * Ymax]uint8
348
349 var reg [MAXREG]int
350
351 var regrex [MAXREG + 1]int
352
353 var ynone = []ytab{
354         {Zlit, 1, argList{}},
355 }
356
357 var ytext = []ytab{
358         {Zpseudo, 0, argList{Ymb, Ytextsize}},
359         {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
360 }
361
362 var ynop = []ytab{
363         {Zpseudo, 0, argList{}},
364         {Zpseudo, 0, argList{Yiauto}},
365         {Zpseudo, 0, argList{Yml}},
366         {Zpseudo, 0, argList{Yrf}},
367         {Zpseudo, 0, argList{Yxr}},
368         {Zpseudo, 0, argList{Yiauto}},
369         {Zpseudo, 0, argList{Yml}},
370         {Zpseudo, 0, argList{Yrf}},
371         {Zpseudo, 1, argList{Yxr}},
372 }
373
374 var yfuncdata = []ytab{
375         {Zpseudo, 0, argList{Yi32, Ym}},
376 }
377
378 var ypcdata = []ytab{
379         {Zpseudo, 0, argList{Yi32, Yi32}},
380 }
381
382 var yxorb = []ytab{
383         {Zib_, 1, argList{Yi32, Yal}},
384         {Zibo_m, 2, argList{Yi32, Ymb}},
385         {Zr_m, 1, argList{Yrb, Ymb}},
386         {Zm_r, 1, argList{Ymb, Yrb}},
387 }
388
389 var yaddl = []ytab{
390         {Zibo_m, 2, argList{Yi8, Yml}},
391         {Zil_, 1, argList{Yi32, Yax}},
392         {Zilo_m, 2, argList{Yi32, Yml}},
393         {Zr_m, 1, argList{Yrl, Yml}},
394         {Zm_r, 1, argList{Yml, Yrl}},
395 }
396
397 var yincl = []ytab{
398         {Z_rp, 1, argList{Yrl}},
399         {Zo_m, 2, argList{Yml}},
400 }
401
402 var yincq = []ytab{
403         {Zo_m, 2, argList{Yml}},
404 }
405
406 var ycmpb = []ytab{
407         {Z_ib, 1, argList{Yal, Yi32}},
408         {Zm_ibo, 2, argList{Ymb, Yi32}},
409         {Zm_r, 1, argList{Ymb, Yrb}},
410         {Zr_m, 1, argList{Yrb, Ymb}},
411 }
412
413 var ycmpl = []ytab{
414         {Zm_ibo, 2, argList{Yml, Yi8}},
415         {Z_il, 1, argList{Yax, Yi32}},
416         {Zm_ilo, 2, argList{Yml, Yi32}},
417         {Zm_r, 1, argList{Yml, Yrl}},
418         {Zr_m, 1, argList{Yrl, Yml}},
419 }
420
421 var yshb = []ytab{
422         {Zo_m, 2, argList{Yi1, Ymb}},
423         {Zibo_m, 2, argList{Yu8, Ymb}},
424         {Zo_m, 2, argList{Ycx, Ymb}},
425 }
426
427 var yshl = []ytab{
428         {Zo_m, 2, argList{Yi1, Yml}},
429         {Zibo_m, 2, argList{Yu8, Yml}},
430         {Zo_m, 2, argList{Ycl, Yml}},
431         {Zo_m, 2, argList{Ycx, Yml}},
432 }
433
434 var ytestl = []ytab{
435         {Zil_, 1, argList{Yi32, Yax}},
436         {Zilo_m, 2, argList{Yi32, Yml}},
437         {Zr_m, 1, argList{Yrl, Yml}},
438         {Zm_r, 1, argList{Yml, Yrl}},
439 }
440
441 var ymovb = []ytab{
442         {Zr_m, 1, argList{Yrb, Ymb}},
443         {Zm_r, 1, argList{Ymb, Yrb}},
444         {Zib_rp, 1, argList{Yi32, Yrb}},
445         {Zibo_m, 2, argList{Yi32, Ymb}},
446 }
447
448 var ybtl = []ytab{
449         {Zibo_m, 2, argList{Yi8, Yml}},
450         {Zr_m, 1, argList{Yrl, Yml}},
451 }
452
453 var ymovw = []ytab{
454         {Zr_m, 1, argList{Yrl, Yml}},
455         {Zm_r, 1, argList{Yml, Yrl}},
456         {Zil_rp, 1, argList{Yi32, Yrl}},
457         {Zilo_m, 2, argList{Yi32, Yml}},
458         {Zaut_r, 2, argList{Yiauto, Yrl}},
459 }
460
461 var ymovl = []ytab{
462         {Zr_m, 1, argList{Yrl, Yml}},
463         {Zm_r, 1, argList{Yml, Yrl}},
464         {Zil_rp, 1, argList{Yi32, Yrl}},
465         {Zilo_m, 2, argList{Yi32, Yml}},
466         {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
467         {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
468         {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
469         {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
470         {Zaut_r, 2, argList{Yiauto, Yrl}},
471 }
472
473 var yret = []ytab{
474         {Zo_iw, 1, argList{}},
475         {Zo_iw, 1, argList{Yi32}},
476 }
477
478 var ymovq = []ytab{
479         // valid in 32-bit mode
480         {Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
481         {Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
482         {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
483         {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
484         {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
485
486         // valid only in 64-bit mode, usually with 64-bit prefix
487         {Zr_m, 1, argList{Yrl, Yml}},      // 0x89
488         {Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
489         {Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
490         {Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
491         {Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
492         {Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
493         {Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
494         {Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
495         {Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
496         {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
497 }
498
499 var ymovbe = []ytab{
500         {Zlitm_r, 3, argList{Ym, Yrl}},
501         {Zlitr_m, 3, argList{Yrl, Ym}},
502 }
503
504 var ym_rl = []ytab{
505         {Zm_r, 1, argList{Ym, Yrl}},
506 }
507
508 var yrl_m = []ytab{
509         {Zr_m, 1, argList{Yrl, Ym}},
510 }
511
512 var ymb_rl = []ytab{
513         {Zmb_r, 1, argList{Ymb, Yrl}},
514 }
515
516 var yml_rl = []ytab{
517         {Zm_r, 1, argList{Yml, Yrl}},
518 }
519
520 var yrl_ml = []ytab{
521         {Zr_m, 1, argList{Yrl, Yml}},
522 }
523
524 var yml_mb = []ytab{
525         {Zr_m, 1, argList{Yrb, Ymb}},
526         {Zm_r, 1, argList{Ymb, Yrb}},
527 }
528
529 var yrb_mb = []ytab{
530         {Zr_m, 1, argList{Yrb, Ymb}},
531 }
532
533 var yxchg = []ytab{
534         {Z_rp, 1, argList{Yax, Yrl}},
535         {Zrp_, 1, argList{Yrl, Yax}},
536         {Zr_m, 1, argList{Yrl, Yml}},
537         {Zm_r, 1, argList{Yml, Yrl}},
538 }
539
540 var ydivl = []ytab{
541         {Zm_o, 2, argList{Yml}},
542 }
543
544 var ydivb = []ytab{
545         {Zm_o, 2, argList{Ymb}},
546 }
547
548 var yimul = []ytab{
549         {Zm_o, 2, argList{Yml}},
550         {Zib_rr, 1, argList{Yi8, Yrl}},
551         {Zil_rr, 1, argList{Yi32, Yrl}},
552         {Zm_r, 2, argList{Yml, Yrl}},
553 }
554
555 var yimul3 = []ytab{
556         {Zibm_r, 2, argList{Yi8, Yml, Yrl}},
557         {Zibm_r, 2, argList{Yi32, Yml, Yrl}},
558 }
559
560 var ybyte = []ytab{
561         {Zbyte, 1, argList{Yi64}},
562 }
563
564 var yin = []ytab{
565         {Zib_, 1, argList{Yi32}},
566         {Zlit, 1, argList{}},
567 }
568
569 var yint = []ytab{
570         {Zib_, 1, argList{Yi32}},
571 }
572
573 var ypushl = []ytab{
574         {Zrp_, 1, argList{Yrl}},
575         {Zm_o, 2, argList{Ym}},
576         {Zib_, 1, argList{Yi8}},
577         {Zil_, 1, argList{Yi32}},
578 }
579
580 var ypopl = []ytab{
581         {Z_rp, 1, argList{Yrl}},
582         {Zo_m, 2, argList{Ym}},
583 }
584
585 var ywrfsbase = []ytab{
586         {Zm_o, 2, argList{Yrl}},
587 }
588
589 var yrdrand = []ytab{
590         {Zo_m, 2, argList{Yrl}},
591 }
592
593 var yclflush = []ytab{
594         {Zo_m, 2, argList{Ym}},
595 }
596
597 var ybswap = []ytab{
598         {Z_rp, 2, argList{Yrl}},
599 }
600
601 var yscond = []ytab{
602         {Zo_m, 2, argList{Ymb}},
603 }
604
605 var yjcond = []ytab{
606         {Zbr, 0, argList{Ybr}},
607         {Zbr, 0, argList{Yi0, Ybr}},
608         {Zbr, 1, argList{Yi1, Ybr}},
609 }
610
611 var yloop = []ytab{
612         {Zloop, 1, argList{Ybr}},
613 }
614
615 var ycall = []ytab{
616         {Zcallindreg, 0, argList{Yml}},
617         {Zcallindreg, 2, argList{Yrx, Yrx}},
618         {Zcallind, 2, argList{Yindir}},
619         {Zcall, 0, argList{Ybr}},
620         {Zcallcon, 1, argList{Yi32}},
621 }
622
623 var yduff = []ytab{
624         {Zcallduff, 1, argList{Yi32}},
625 }
626
627 var yjmp = []ytab{
628         {Zo_m64, 2, argList{Yml}},
629         {Zjmp, 0, argList{Ybr}},
630         {Zjmpcon, 1, argList{Yi32}},
631 }
632
633 var yfmvd = []ytab{
634         {Zm_o, 2, argList{Ym, Yf0}},
635         {Zo_m, 2, argList{Yf0, Ym}},
636         {Zm_o, 2, argList{Yrf, Yf0}},
637         {Zo_m, 2, argList{Yf0, Yrf}},
638 }
639
640 var yfmvdp = []ytab{
641         {Zo_m, 2, argList{Yf0, Ym}},
642         {Zo_m, 2, argList{Yf0, Yrf}},
643 }
644
645 var yfmvf = []ytab{
646         {Zm_o, 2, argList{Ym, Yf0}},
647         {Zo_m, 2, argList{Yf0, Ym}},
648 }
649
650 var yfmvx = []ytab{
651         {Zm_o, 2, argList{Ym, Yf0}},
652 }
653
654 var yfmvp = []ytab{
655         {Zo_m, 2, argList{Yf0, Ym}},
656 }
657
658 var yfcmv = []ytab{
659         {Zm_o, 2, argList{Yrf, Yf0}},
660 }
661
662 var yfadd = []ytab{
663         {Zm_o, 2, argList{Ym, Yf0}},
664         {Zm_o, 2, argList{Yrf, Yf0}},
665         {Zo_m, 2, argList{Yf0, Yrf}},
666 }
667
668 var yfxch = []ytab{
669         {Zo_m, 2, argList{Yf0, Yrf}},
670         {Zm_o, 2, argList{Yrf, Yf0}},
671 }
672
673 var ycompp = []ytab{
674         {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
675 }
676
677 var ystsw = []ytab{
678         {Zo_m, 2, argList{Ym}},
679         {Zlit, 1, argList{Yax}},
680 }
681
682 var ysvrs_mo = []ytab{
683         {Zm_o, 2, argList{Ym}},
684 }
685
686 // unaryDst version of "ysvrs_mo".
687 var ysvrs_om = []ytab{
688         {Zo_m, 2, argList{Ym}},
689 }
690
691 var ymm = []ytab{
692         {Zm_r_xm, 1, argList{Ymm, Ymr}},
693         {Zm_r_xm, 2, argList{Yxm, Yxr}},
694 }
695
696 var yxm = []ytab{
697         {Zm_r_xm, 1, argList{Yxm, Yxr}},
698 }
699
700 var yxm_q4 = []ytab{
701         {Zm_r, 1, argList{Yxm, Yxr}},
702 }
703
704 var yxcvm1 = []ytab{
705         {Zm_r_xm, 2, argList{Yxm, Yxr}},
706         {Zm_r_xm, 2, argList{Yxm, Ymr}},
707 }
708
709 var yxcvm2 = []ytab{
710         {Zm_r_xm, 2, argList{Yxm, Yxr}},
711         {Zm_r_xm, 2, argList{Ymm, Yxr}},
712 }
713
714 var yxr = []ytab{
715         {Zm_r_xm, 1, argList{Yxr, Yxr}},
716 }
717
718 var yxr_ml = []ytab{
719         {Zr_m_xm, 1, argList{Yxr, Yml}},
720 }
721
722 var ymr = []ytab{
723         {Zm_r, 1, argList{Ymr, Ymr}},
724 }
725
726 var ymr_ml = []ytab{
727         {Zr_m_xm, 1, argList{Ymr, Yml}},
728 }
729
730 var yxcmpi = []ytab{
731         {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
732 }
733
734 var yxmov = []ytab{
735         {Zm_r_xm, 1, argList{Yxm, Yxr}},
736         {Zr_m_xm, 1, argList{Yxr, Yxm}},
737 }
738
739 var yxcvfl = []ytab{
740         {Zm_r_xm, 1, argList{Yxm, Yrl}},
741 }
742
743 var yxcvlf = []ytab{
744         {Zm_r_xm, 1, argList{Yml, Yxr}},
745 }
746
747 var yxcvfq = []ytab{
748         {Zm_r_xm, 2, argList{Yxm, Yrl}},
749 }
750
751 var yxcvqf = []ytab{
752         {Zm_r_xm, 2, argList{Yml, Yxr}},
753 }
754
755 var yps = []ytab{
756         {Zm_r_xm, 1, argList{Ymm, Ymr}},
757         {Zibo_m_xm, 2, argList{Yi8, Ymr}},
758         {Zm_r_xm, 2, argList{Yxm, Yxr}},
759         {Zibo_m_xm, 3, argList{Yi8, Yxr}},
760 }
761
762 var yxrrl = []ytab{
763         {Zm_r, 1, argList{Yxr, Yrl}},
764 }
765
766 var ymrxr = []ytab{
767         {Zm_r, 1, argList{Ymr, Yxr}},
768         {Zm_r_xm, 1, argList{Yxm, Yxr}},
769 }
770
771 var ymshuf = []ytab{
772         {Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
773 }
774
775 var ymshufb = []ytab{
776         {Zm2_r, 2, argList{Yxm, Yxr}},
777 }
778
779 // It should never have more than 1 entry,
780 // because some optab entries you opcode secuences that
781 // are longer than 2 bytes (zoffset=2 here),
782 // ROUNDPD and ROUNDPS and recently added BLENDPD,
783 // to name a few.
784 var yxshuf = []ytab{
785         {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
786 }
787
788 var yextrw = []ytab{
789         {Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
790         {Zibr_m, 2, argList{Yu8, Yxr, Yml}},
791 }
792
793 var yextr = []ytab{
794         {Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
795 }
796
797 var yinsrw = []ytab{
798         {Zibm_r, 2, argList{Yu8, Yml, Yxr}},
799 }
800
801 var yinsr = []ytab{
802         {Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
803 }
804
805 var ypsdq = []ytab{
806         {Zibo_m, 2, argList{Yi8, Yxr}},
807 }
808
809 var ymskb = []ytab{
810         {Zm_r_xm, 2, argList{Yxr, Yrl}},
811         {Zm_r_xm, 1, argList{Ymr, Yrl}},
812 }
813
814 var ycrc32l = []ytab{
815         {Zlitm_r, 0, argList{Yml, Yrl}},
816 }
817
818 var ycrc32b = []ytab{
819         {Zlitm_r, 0, argList{Ymb, Yrl}},
820 }
821
822 var yprefetch = []ytab{
823         {Zm_o, 2, argList{Ym}},
824 }
825
826 var yaes = []ytab{
827         {Zlitm_r, 2, argList{Yxm, Yxr}},
828 }
829
830 var yxbegin = []ytab{
831         {Zjmp, 1, argList{Ybr}},
832 }
833
834 var yxabort = []ytab{
835         {Zib_, 1, argList{Yu8}},
836 }
837
838 var ylddqu = []ytab{
839         {Zm_r, 1, argList{Ym, Yxr}},
840 }
841
842 var ypalignr = []ytab{
843         {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
844 }
845
846 var ysha256rnds2 = []ytab{
847         {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
848 }
849
850 var yblendvpd = []ytab{
851         {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
852 }
853
854 var ymmxmm0f38 = []ytab{
855         {Zlitm_r, 3, argList{Ymm, Ymr}},
856         {Zlitm_r, 5, argList{Yxm, Yxr}},
857 }
858
859 var yextractps = []ytab{
860         {Zibr_m, 2, argList{Yu2, Yxr, Yml}},
861 }
862
863 var ysha1rnds4 = []ytab{
864         {Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
865 }
866
867 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
868 // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
869 // to find the entry with the given p.As and then looks through the ytable for
870 // that instruction (the second field in the optab struct) for a line whose
871 // first two values match the Ytypes of the p.From and p.To operands.  The
872 // function oclass computes the specific Ytype of an operand and then the set
873 // of more general Ytypes that it satisfies is implied by the ycover table, set
874 // up in instinit.  For example, oclass distinguishes the constants 0 and 1
875 // from the more general 8-bit constants, but instinit says
876 //
877 //        ycover[Yi0*Ymax+Ys32] = 1
878 //        ycover[Yi1*Ymax+Ys32] = 1
879 //        ycover[Yi8*Ymax+Ys32] = 1
880 //
881 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
882 // if that's what an instruction can handle.
883 //
884 // In parallel with the scan through the ytable for the appropriate line, there
885 // is a z pointer that starts out pointing at the strange magic byte list in
886 // the Optab struct.  With each step past a non-matching ytable line, z
887 // advances by the 4th entry in the line.  When a matching line is found, that
888 // z pointer has the extra data to use in laying down the instruction bytes.
889 // The actual bytes laid down are a function of the 3rd entry in the line (that
890 // is, the Ztype) and the z bytes.
891 //
892 // For example, let's look at AADDL.  The optab line says:
893 //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
894 //
895 // and yaddl says
896 //        var yaddl = []ytab{
897 //                {Yi8, Ynone, Yml, Zibo_m, 2},
898 //                {Yi32, Ynone, Yax, Zil_, 1},
899 //                {Yi32, Ynone, Yml, Zilo_m, 2},
900 //                {Yrl, Ynone, Yml, Zr_m, 1},
901 //                {Yml, Ynone, Yrl, Zm_r, 1},
902 //        }
903 //
904 // so there are 5 possible types of ADDL instruction that can be laid down, and
905 // possible states used to lay them down (Ztype and z pointer, assuming z
906 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
907 //
908 //        Yi8, Yml -> Zibo_m, z (0x83, 00)
909 //        Yi32, Yax -> Zil_, z+2 (0x05)
910 //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
911 //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
912 //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
913 //
914 // The Pconstant in the optab line controls the prefix bytes to emit.  That's
915 // relatively straightforward as this program goes.
916 //
917 // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
918 // example, is an opcode byte (z[0]) then an asmando (which is some kind of
919 // encoded addressing mode for the Yml arg), and then a single immediate byte.
920 // Zilo_m is the same but a long (32-bit) immediate.
921 var optab =
922 //      as, ytab, andproto, opcode
923 [...]Optab{
924         {obj.AXXX, nil, 0, opBytes{}},
925         {AAAA, ynone, P32, opBytes{0x37}},
926         {AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
927         {AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
928         {AAAS, ynone, P32, opBytes{0x3f}},
929         {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
930         {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
931         {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
932         {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
933         {AADCXL, yml_rl, Pq4, opBytes{0xf6}},
934         {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
935         {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
936         {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
937         {AADDPD, yxm, Pq, opBytes{0x58}},
938         {AADDPS, yxm, Pm, opBytes{0x58}},
939         {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
940         {AADDSD, yxm, Pf2, opBytes{0x58}},
941         {AADDSS, yxm, Pf3, opBytes{0x58}},
942         {AADDSUBPD, yxm, Pq, opBytes{0xd0}},
943         {AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
944         {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
945         {AADOXL, yml_rl, Pq5, opBytes{0xf6}},
946         {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
947         {AADJSP, nil, 0, opBytes{}},
948         {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
949         {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
950         {AANDNPD, yxm, Pq, opBytes{0x55}},
951         {AANDNPS, yxm, Pm, opBytes{0x55}},
952         {AANDPD, yxm, Pq, opBytes{0x54}},
953         {AANDPS, yxm, Pm, opBytes{0x54}},
954         {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
955         {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
956         {AARPL, yrl_ml, P32, opBytes{0x63}},
957         {ABOUNDL, yrl_m, P32, opBytes{0x62}},
958         {ABOUNDW, yrl_m, Pe, opBytes{0x62}},
959         {ABSFL, yml_rl, Pm, opBytes{0xbc}},
960         {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
961         {ABSFW, yml_rl, Pq, opBytes{0xbc}},
962         {ABSRL, yml_rl, Pm, opBytes{0xbd}},
963         {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
964         {ABSRW, yml_rl, Pq, opBytes{0xbd}},
965         {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
966         {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
967         {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
968         {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
969         {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
970         {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
971         {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
972         {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
973         {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
974         {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
975         {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
976         {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
977         {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
978         {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
979         {ABYTE, ybyte, Px, opBytes{1}},
980         {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
981         {ACBW, ynone, Pe, opBytes{0x98}},
982         {ACDQ, ynone, Px, opBytes{0x99}},
983         {ACDQE, ynone, Pw, opBytes{0x98}},
984         {ACLAC, ynone, Pm, opBytes{01, 0xca}},
985         {ACLC, ynone, Px, opBytes{0xf8}},
986         {ACLD, ynone, Px, opBytes{0xfc}},
987         {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
988         {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
989         {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
990         {ACLI, ynone, Px, opBytes{0xfa}},
991         {ACLTS, ynone, Pm, opBytes{0x06}},
992         {ACLWB, yclflush, Pq, opBytes{0xae, 06}},
993         {ACMC, ynone, Px, opBytes{0xf5}},
994         {ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
995         {ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
996         {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
997         {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
998         {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
999         {ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
1000         {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
1001         {ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
1002         {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
1003         {ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
1004         {ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
1005         {ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
1006         {ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
1007         {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
1008         {ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
1009         {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
1010         {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
1011         {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
1012         {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
1013         {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
1014         {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
1015         {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
1016         {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
1017         {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
1018         {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
1019         {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
1020         {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
1021         {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
1022         {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
1023         {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
1024         {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
1025         {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
1026         {ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
1027         {ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
1028         {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
1029         {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
1030         {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
1031         {ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
1032         {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
1033         {ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
1034         {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
1035         {ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
1036         {ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
1037         {ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
1038         {ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
1039         {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
1040         {ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
1041         {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
1042         {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
1043         {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1044         {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
1045         {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
1046         {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1047         {ACMPSB, ynone, Pb, opBytes{0xa6}},
1048         {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
1049         {ACMPSL, ynone, Px, opBytes{0xa7}},
1050         {ACMPSQ, ynone, Pw, opBytes{0xa7}},
1051         {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
1052         {ACMPSW, ynone, Pe, opBytes{0xa7}},
1053         {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1054         {ACOMISD, yxm, Pe, opBytes{0x2f}},
1055         {ACOMISS, yxm, Pm, opBytes{0x2f}},
1056         {ACPUID, ynone, Pm, opBytes{0xa2}},
1057         {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
1058         {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
1059         {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
1060         {ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
1061         {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
1062         {ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
1063         {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
1064         {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
1065         {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
1066         {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
1067         {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
1068         {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
1069         {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
1070         {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
1071         {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
1072         {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
1073         {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
1074         {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
1075         {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
1076         {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
1077         {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
1078         {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
1079         {ACWD, ynone, Pe, opBytes{0x99}},
1080         {ACWDE, ynone, Px, opBytes{0x98}},
1081         {ACQO, ynone, Pw, opBytes{0x99}},
1082         {ADAA, ynone, P32, opBytes{0x27}},
1083         {ADAS, ynone, P32, opBytes{0x2f}},
1084         {ADECB, yscond, Pb, opBytes{0xfe, 01}},
1085         {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
1086         {ADECQ, yincq, Pw, opBytes{0xff, 01}},
1087         {ADECW, yincq, Pe, opBytes{0xff, 01}},
1088         {ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
1089         {ADIVL, ydivl, Px, opBytes{0xf7, 06}},
1090         {ADIVPD, yxm, Pe, opBytes{0x5e}},
1091         {ADIVPS, yxm, Pm, opBytes{0x5e}},
1092         {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
1093         {ADIVSD, yxm, Pf2, opBytes{0x5e}},
1094         {ADIVSS, yxm, Pf3, opBytes{0x5e}},
1095         {ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
1096         {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
1097         {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
1098         {AEMMS, ynone, Pm, opBytes{0x77}},
1099         {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
1100         {AENTER, nil, 0, opBytes{}}, // botch
1101         {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
1102         {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
1103         {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
1104         {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
1105         {AHLT, ynone, Px, opBytes{0xf4}},
1106         {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
1107         {AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
1108         {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
1109         {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
1110         {AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
1111         {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1112         {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1113         {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1114         {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
1115         {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
1116         {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
1117         {AINB, yin, Pb, opBytes{0xe4, 0xec}},
1118         {AINW, yin, Pe, opBytes{0xe5, 0xed}},
1119         {AINL, yin, Px, opBytes{0xe5, 0xed}},
1120         {AINCB, yscond, Pb, opBytes{0xfe, 00}},
1121         {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
1122         {AINCQ, yincq, Pw, opBytes{0xff, 00}},
1123         {AINCW, yincq, Pe, opBytes{0xff, 00}},
1124         {AINSB, ynone, Pb, opBytes{0x6c}},
1125         {AINSL, ynone, Px, opBytes{0x6d}},
1126         {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
1127         {AINSW, ynone, Pe, opBytes{0x6d}},
1128         {AICEBP, ynone, Px, opBytes{0xf1}},
1129         {AINT, yint, Px, opBytes{0xcd}},
1130         {AINTO, ynone, P32, opBytes{0xce}},
1131         {AIRETL, ynone, Px, opBytes{0xcf}},
1132         {AIRETQ, ynone, Pw, opBytes{0xcf}},
1133         {AIRETW, ynone, Pe, opBytes{0xcf}},
1134         {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
1135         {AJCS, yjcond, Px, opBytes{0x72, 0x82}},
1136         {AJCXZL, yloop, Px, opBytes{0xe3}},
1137         {AJCXZW, yloop, Px, opBytes{0xe3}},
1138         {AJCXZQ, yloop, Px, opBytes{0xe3}},
1139         {AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
1140         {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
1141         {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
1142         {AJHI, yjcond, Px, opBytes{0x77, 0x87}},
1143         {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
1144         {AJLS, yjcond, Px, opBytes{0x76, 0x86}},
1145         {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
1146         {AJMI, yjcond, Px, opBytes{0x78, 0x88}},
1147         {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
1148         {AJNE, yjcond, Px, opBytes{0x75, 0x85}},
1149         {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
1150         {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
1151         {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
1152         {AJPL, yjcond, Px, opBytes{0x79, 0x89}},
1153         {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
1154         {AHADDPD, yxm, Pq, opBytes{0x7c}},
1155         {AHADDPS, yxm, Pf2, opBytes{0x7c}},
1156         {AHSUBPD, yxm, Pq, opBytes{0x7d}},
1157         {AHSUBPS, yxm, Pf2, opBytes{0x7d}},
1158         {ALAHF, ynone, Px, opBytes{0x9f}},
1159         {ALARL, yml_rl, Pm, opBytes{0x02}},
1160         {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
1161         {ALARW, yml_rl, Pq, opBytes{0x02}},
1162         {ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
1163         {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
1164         {ALEAL, ym_rl, Px, opBytes{0x8d}},
1165         {ALEAQ, ym_rl, Pw, opBytes{0x8d}},
1166         {ALEAVEL, ynone, P32, opBytes{0xc9}},
1167         {ALEAVEQ, ynone, Py, opBytes{0xc9}},
1168         {ALEAVEW, ynone, Pe, opBytes{0xc9}},
1169         {ALEAW, ym_rl, Pe, opBytes{0x8d}},
1170         {ALOCK, ynone, Px, opBytes{0xf0}},
1171         {ALODSB, ynone, Pb, opBytes{0xac}},
1172         {ALODSL, ynone, Px, opBytes{0xad}},
1173         {ALODSQ, ynone, Pw, opBytes{0xad}},
1174         {ALODSW, ynone, Pe, opBytes{0xad}},
1175         {ALONG, ybyte, Px, opBytes{4}},
1176         {ALOOP, yloop, Px, opBytes{0xe2}},
1177         {ALOOPEQ, yloop, Px, opBytes{0xe1}},
1178         {ALOOPNE, yloop, Px, opBytes{0xe0}},
1179         {ALTR, ydivl, Pm, opBytes{0x00, 03}},
1180         {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
1181         {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
1182         {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
1183         {ALSLL, yml_rl, Pm, opBytes{0x03}},
1184         {ALSLW, yml_rl, Pq, opBytes{0x03}},
1185         {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
1186         {AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
1187         {AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
1188         {AMAXPD, yxm, Pe, opBytes{0x5f}},
1189         {AMAXPS, yxm, Pm, opBytes{0x5f}},
1190         {AMAXSD, yxm, Pf2, opBytes{0x5f}},
1191         {AMAXSS, yxm, Pf3, opBytes{0x5f}},
1192         {AMINPD, yxm, Pe, opBytes{0x5d}},
1193         {AMINPS, yxm, Pm, opBytes{0x5d}},
1194         {AMINSD, yxm, Pf2, opBytes{0x5d}},
1195         {AMINSS, yxm, Pf3, opBytes{0x5d}},
1196         {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
1197         {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
1198         {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
1199         {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
1200         {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
1201         {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
1202         {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
1203         {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
1204         {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
1205         {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
1206         {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
1207         {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
1208         {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
1209         {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
1210         {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
1211         {AMOVHLPS, yxr, Pm, opBytes{0x12}},
1212         {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
1213         {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
1214         {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1215         {AMOVLHPS, yxr, Pm, opBytes{0x16}},
1216         {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
1217         {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
1218         {AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
1219         {AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
1220         {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
1221         {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
1222         {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
1223         {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
1224         {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
1225         {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
1226         {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
1227         {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1228         {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
1229         {AMOVSB, ynone, Pb, opBytes{0xa4}},
1230         {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
1231         {AMOVSL, ynone, Px, opBytes{0xa5}},
1232         {AMOVSQ, ynone, Pw, opBytes{0xa5}},
1233         {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
1234         {AMOVSW, ynone, Pe, opBytes{0xa5}},
1235         {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
1236         {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
1237         {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
1238         {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
1239         {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
1240         {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
1241         {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
1242         {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
1243         {AMULB, ydivb, Pb, opBytes{0xf6, 04}},
1244         {AMULL, ydivl, Px, opBytes{0xf7, 04}},
1245         {AMULPD, yxm, Pe, opBytes{0x59}},
1246         {AMULPS, yxm, Ym, opBytes{0x59}},
1247         {AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
1248         {AMULSD, yxm, Pf2, opBytes{0x59}},
1249         {AMULSS, yxm, Pf3, opBytes{0x59}},
1250         {AMULW, ydivl, Pe, opBytes{0xf7, 04}},
1251         {ANEGB, yscond, Pb, opBytes{0xf6, 03}},
1252         {ANEGL, yscond, Px, opBytes{0xf7, 03}},
1253         {ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
1254         {ANEGW, yscond, Pe, opBytes{0xf7, 03}},
1255         {obj.ANOP, ynop, Px, opBytes{0, 0}},
1256         {ANOTB, yscond, Pb, opBytes{0xf6, 02}},
1257         {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
1258         {ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
1259         {ANOTW, yscond, Pe, opBytes{0xf7, 02}},
1260         {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
1261         {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1262         {AORPD, yxm, Pq, opBytes{0x56}},
1263         {AORPS, yxm, Pm, opBytes{0x56}},
1264         {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1265         {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1266         {AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
1267         {AOUTL, yin, Px, opBytes{0xe7, 0xef}},
1268         {AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
1269         {AOUTSB, ynone, Pb, opBytes{0x6e}},
1270         {AOUTSL, ynone, Px, opBytes{0x6f}},
1271         {AOUTSW, ynone, Pe, opBytes{0x6f}},
1272         {APABSB, yxm_q4, Pq4, opBytes{0x1c}},
1273         {APABSD, yxm_q4, Pq4, opBytes{0x1e}},
1274         {APABSW, yxm_q4, Pq4, opBytes{0x1d}},
1275         {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
1276         {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
1277         {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
1278         {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
1279         {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
1280         {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
1281         {APADDQ, yxm, Pe, opBytes{0xd4}},
1282         {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
1283         {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
1284         {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
1285         {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
1286         {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
1287         {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
1288         {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
1289         {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
1290         {APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
1291         {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
1292         {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
1293         {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
1294         {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
1295         {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
1296         {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
1297         {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
1298         {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
1299         {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
1300         {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
1301         {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
1302         {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
1303         {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
1304         {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
1305         {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
1306         {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
1307         {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
1308         {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
1309         {APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
1310         {APHADDW, yxm_q4, Pq4, opBytes{0x01}},
1311         {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
1312         {APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
1313         {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
1314         {APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
1315         {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
1316         {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
1317         {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
1318         {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
1319         {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
1320         {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
1321         {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
1322         {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
1323         {APMAXSW, yxm, Pe, opBytes{0xee}},
1324         {APMAXUB, yxm, Pe, opBytes{0xde}},
1325         {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
1326         {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
1327         {APMINSB, yxm_q4, Pq4, opBytes{0x38}},
1328         {APMINSD, yxm_q4, Pq4, opBytes{0x39}},
1329         {APMINSW, yxm, Pe, opBytes{0xea}},
1330         {APMINUB, yxm, Pe, opBytes{0xda}},
1331         {APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
1332         {APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
1333         {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
1334         {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
1335         {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
1336         {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
1337         {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
1338         {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
1339         {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
1340         {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
1341         {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
1342         {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
1343         {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
1344         {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
1345         {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
1346         {APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
1347         {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
1348         {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
1349         {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
1350         {APMULLD, yxm_q4, Pq4, opBytes{0x40}},
1351         {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
1352         {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
1353         {APOPAL, ynone, P32, opBytes{0x61}},
1354         {APOPAW, ynone, Pe, opBytes{0x61}},
1355         {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
1356         {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
1357         {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
1358         {APOPFL, ynone, P32, opBytes{0x9d}},
1359         {APOPFQ, ynone, Py, opBytes{0x9d}},
1360         {APOPFW, ynone, Pe, opBytes{0x9d}},
1361         {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
1362         {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
1363         {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
1364         {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
1365         {APSADBW, yxm, Pq, opBytes{0xf6}},
1366         {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
1367         {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
1368         {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
1369         {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
1370         {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
1371         {APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
1372         {APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
1373         {APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
1374         {APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
1375         {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
1376         {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
1377         {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
1378         {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
1379         {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
1380         {APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
1381         {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
1382         {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
1383         {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
1384         {APSUBB, yxm, Pe, opBytes{0xf8}},
1385         {APSUBL, yxm, Pe, opBytes{0xfa}},
1386         {APSUBQ, yxm, Pe, opBytes{0xfb}},
1387         {APSUBSB, yxm, Pe, opBytes{0xe8}},
1388         {APSUBSW, yxm, Pe, opBytes{0xe9}},
1389         {APSUBUSB, yxm, Pe, opBytes{0xd8}},
1390         {APSUBUSW, yxm, Pe, opBytes{0xd9}},
1391         {APSUBW, yxm, Pe, opBytes{0xf9}},
1392         {APTEST, yxm_q4, Pq4, opBytes{0x17}},
1393         {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
1394         {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
1395         {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
1396         {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
1397         {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
1398         {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
1399         {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
1400         {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
1401         {APUSHAL, ynone, P32, opBytes{0x60}},
1402         {APUSHAW, ynone, Pe, opBytes{0x60}},
1403         {APUSHFL, ynone, P32, opBytes{0x9c}},
1404         {APUSHFQ, ynone, Py, opBytes{0x9c}},
1405         {APUSHFW, ynone, Pe, opBytes{0x9c}},
1406         {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1407         {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1408         {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1409         {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
1410         {AQUAD, ybyte, Px, opBytes{8}},
1411         {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
1412         {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1413         {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1414         {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1415         {ARCPPS, yxm, Pm, opBytes{0x53}},
1416         {ARCPSS, yxm, Pf3, opBytes{0x53}},
1417         {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
1418         {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1419         {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1420         {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1421         {AREP, ynone, Px, opBytes{0xf3}},
1422         {AREPN, ynone, Px, opBytes{0xf2}},
1423         {obj.ARET, ynone, Px, opBytes{0xc3}},
1424         {ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
1425         {ARETFL, yret, Px, opBytes{0xcb, 0xca}},
1426         {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
1427         {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
1428         {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1429         {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1430         {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1431         {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
1432         {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1433         {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1434         {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1435         {ARSQRTPS, yxm, Pm, opBytes{0x52}},
1436         {ARSQRTSS, yxm, Pf3, opBytes{0x52}},
1437         {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
1438         {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1439         {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1440         {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1441         {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1442         {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
1443         {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1444         {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1445         {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1446         {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
1447         {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1448         {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1449         {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1450         {ASCASB, ynone, Pb, opBytes{0xae}},
1451         {ASCASL, ynone, Px, opBytes{0xaf}},
1452         {ASCASQ, ynone, Pw, opBytes{0xaf}},
1453         {ASCASW, ynone, Pe, opBytes{0xaf}},
1454         {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
1455         {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
1456         {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
1457         {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
1458         {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
1459         {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
1460         {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
1461         {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
1462         {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
1463         {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
1464         {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
1465         {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
1466         {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
1467         {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
1468         {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
1469         {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
1470         {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1471         {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1472         {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1473         {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1474         {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
1475         {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1476         {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1477         {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1478         {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
1479         {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
1480         {ASQRTPD, yxm, Pe, opBytes{0x51}},
1481         {ASQRTPS, yxm, Pm, opBytes{0x51}},
1482         {ASQRTSD, yxm, Pf2, opBytes{0x51}},
1483         {ASQRTSS, yxm, Pf3, opBytes{0x51}},
1484         {ASTC, ynone, Px, opBytes{0xf9}},
1485         {ASTD, ynone, Px, opBytes{0xfd}},
1486         {ASTI, ynone, Px, opBytes{0xfb}},
1487         {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
1488         {ASTOSB, ynone, Pb, opBytes{0xaa}},
1489         {ASTOSL, ynone, Px, opBytes{0xab}},
1490         {ASTOSQ, ynone, Pw, opBytes{0xab}},
1491         {ASTOSW, ynone, Pe, opBytes{0xab}},
1492         {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
1493         {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1494         {ASUBPD, yxm, Pe, opBytes{0x5c}},
1495         {ASUBPS, yxm, Pm, opBytes{0x5c}},
1496         {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1497         {ASUBSD, yxm, Pf2, opBytes{0x5c}},
1498         {ASUBSS, yxm, Pf3, opBytes{0x5c}},
1499         {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1500         {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
1501         {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
1502         {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
1503         {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1504         {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1505         {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1506         {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
1507         {obj.ATEXT, ytext, Px, opBytes{}},
1508         {AUCOMISD, yxm, Pe, opBytes{0x2e}},
1509         {AUCOMISS, yxm, Pm, opBytes{0x2e}},
1510         {AUNPCKHPD, yxm, Pe, opBytes{0x15}},
1511         {AUNPCKHPS, yxm, Pm, opBytes{0x15}},
1512         {AUNPCKLPD, yxm, Pe, opBytes{0x14}},
1513         {AUNPCKLPS, yxm, Pm, opBytes{0x14}},
1514         {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
1515         {AVERR, ydivl, Pm, opBytes{0x00, 04}},
1516         {AVERW, ydivl, Pm, opBytes{0x00, 05}},
1517         {AWAIT, ynone, Px, opBytes{0x9b}},
1518         {AWORD, ybyte, Px, opBytes{2}},
1519         {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
1520         {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
1521         {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
1522         {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
1523         {AXLAT, ynone, Px, opBytes{0xd7}},
1524         {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
1525         {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1526         {AXORPD, yxm, Pe, opBytes{0x57}},
1527         {AXORPS, yxm, Pm, opBytes{0x57}},
1528         {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1529         {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1530         {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
1531         {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
1532         {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
1533         {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
1534         {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
1535         {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
1536         {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
1537         {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
1538         {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
1539         {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
1540         {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
1541         {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
1542         {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
1543         {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
1544         {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
1545         {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
1546         {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
1547         {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
1548         {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
1549         {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
1550         {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
1551         {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
1552         {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
1553         {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
1554         {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
1555         {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
1556         {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
1557         {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
1558         {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
1559         {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
1560         {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
1561         {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
1562         {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
1563         {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
1564         {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
1565         {AFCOML, yfmvx, Px, opBytes{0xda, 02}},
1566         {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
1567         {AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
1568         {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
1569         {AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
1570         {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
1571         {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
1572         {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
1573         {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
1574         {AFADDDP, ycompp, Px, opBytes{0xde, 00}},
1575         {AFADDW, yfmvx, Px, opBytes{0xde, 00}},
1576         {AFADDL, yfmvx, Px, opBytes{0xda, 00}},
1577         {AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
1578         {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
1579         {AFMULDP, ycompp, Px, opBytes{0xde, 01}},
1580         {AFMULW, yfmvx, Px, opBytes{0xde, 01}},
1581         {AFMULL, yfmvx, Px, opBytes{0xda, 01}},
1582         {AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
1583         {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
1584         {AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
1585         {AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
1586         {AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
1587         {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
1588         {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
1589         {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
1590         {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
1591         {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
1592         {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
1593         {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
1594         {AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
1595         {AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
1596         {AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
1597         {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
1598         {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
1599         {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
1600         {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
1601         {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
1602         {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
1603         {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
1604         {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
1605         {AFFREE, nil, 0, opBytes{}},
1606         {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
1607         {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
1608         {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
1609         {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
1610         {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
1611         {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
1612         {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
1613         {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
1614         {AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
1615         {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
1616         {AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
1617         {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
1618         {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
1619         {AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
1620         {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
1621         {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
1622         {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
1623         {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
1624         {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
1625         {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
1626         {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
1627         {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
1628         {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
1629         {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
1630         {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
1631         {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
1632         {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
1633         {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
1634         {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
1635         {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
1636         {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
1637         {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
1638         {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
1639         {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
1640         {AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
1641         {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
1642         {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
1643         {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
1644         {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
1645         {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
1646         {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
1647         {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
1648         {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
1649         {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
1650         {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
1651         {AINVD, ynone, Pm, opBytes{0x08}},
1652         {AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
1653         {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
1654         {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
1655         {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
1656         {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
1657         {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
1658         {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
1659         {ARDMSR, ynone, Pm, opBytes{0x32}},
1660         {ARDPMC, ynone, Pm, opBytes{0x33}},
1661         {ARDTSC, ynone, Pm, opBytes{0x31}},
1662         {ARSM, ynone, Pm, opBytes{0xaa}},
1663         {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
1664         {ASYSRET, ynone, Pm, opBytes{0x07}},
1665         {AWBINVD, ynone, Pm, opBytes{0x09}},
1666         {AWRMSR, ynone, Pm, opBytes{0x30}},
1667         {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
1668         {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
1669         {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
1670         {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
1671         {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
1672         {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
1673         {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1674         {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1675         {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1676         {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
1677         {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
1678         {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
1679         {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
1680         {AMOVQL, yrl_ml, Px, opBytes{0x89}},
1681         {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
1682         {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
1683         {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
1684         {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
1685         {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
1686         {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
1687         {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
1688         {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
1689         {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
1690         {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
1691         {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
1692         {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
1693         {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
1694         {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
1695         {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
1696         {AMOVDDUP, yxm, Pf2, opBytes{0x12}},
1697         {AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
1698         {AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
1699         {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
1700         {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
1701         {AUD1, ynone, Pm, opBytes{0xb9, 0}},
1702         {AUD2, ynone, Pm, opBytes{0x0b, 0}},
1703         {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
1704         {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
1705         {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
1706         {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
1707         {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
1708         {ALMSW, ydivl, Pm, opBytes{0x01, 06}},
1709         {ALLDT, ydivl, Pm, opBytes{0x00, 02}},
1710         {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
1711         {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
1712         {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
1713         {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
1714         {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
1715         {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
1716         {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
1717         {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
1718         {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
1719         {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
1720         {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
1721         {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
1722         {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
1723         {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
1724         {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
1725         {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
1726         {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
1727         {ASGDT, yclflush, Pm, opBytes{0x01, 00}},
1728         {ASIDT, yclflush, Pm, opBytes{0x01, 01}},
1729         {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
1730         {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
1731         {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
1732         {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
1733         {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
1734         {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
1735         {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
1736         {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
1737         {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
1738         {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
1739         {AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1740         {AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1741         {AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
1742         {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
1743         {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
1744         {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
1745         {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
1746         {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
1747         {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
1748         {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
1749         {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
1750         {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
1751         {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
1752         {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
1753         {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
1754         {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
1755         {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
1756         {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
1757         {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
1758         {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
1759         {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
1760         {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
1761         {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
1762         {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
1763         {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
1764         {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
1765         {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
1766         {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
1767         {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
1768         {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
1769         {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
1770         {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
1771         {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
1772         {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
1773         {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
1774         {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
1775         {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
1776         {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
1777
1778         {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
1779         {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
1780         {AXACQUIRE, ynone, Px, opBytes{0xf2}},
1781         {AXRELEASE, ynone, Px, opBytes{0xf3}},
1782         {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
1783         {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
1784         {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
1785         {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
1786         {AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
1787         {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
1788         {obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
1789         {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
1790         {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
1791
1792         {obj.AEND, nil, 0, opBytes{}},
1793         {0, nil, 0, opBytes{}},
1794 }
1795
1796 var opindex [(ALAST + 1) & obj.AMask]*Optab
1797
1798 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
1799 // This happens on systems like Solaris that call .so functions instead of system calls.
1800 // It does not seem to be necessary for any other systems. This is probably working
1801 // around a Solaris-specific bug that should be fixed differently, but we don't know
1802 // what that bug is. And this does fix it.
1803 func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
1804         if ctxt.Headtype == objabi.Hsolaris {
1805                 // All the Solaris dynamic imports from libc.so begin with "libc_".
1806                 return strings.HasPrefix(s.Name, "libc_")
1807         }
1808         return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
1809 }
1810
1811 // single-instruction no-ops of various lengths.
1812 // constructed by hand and disassembled with gdb to verify.
1813 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
1814 var nop = [][16]uint8{
1815         {0x90},
1816         {0x66, 0x90},
1817         {0x0F, 0x1F, 0x00},
1818         {0x0F, 0x1F, 0x40, 0x00},
1819         {0x0F, 0x1F, 0x44, 0x00, 0x00},
1820         {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
1821         {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
1822         {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1823         {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1824 }
1825
1826 // Native Client rejects the repeated 0x66 prefix.
1827 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1828 func fillnop(p []byte, n int) {
1829         var m int
1830
1831         for n > 0 {
1832                 m = n
1833                 if m > len(nop) {
1834                         m = len(nop)
1835                 }
1836                 copy(p[:m], nop[m-1][:m])
1837                 p = p[m:]
1838                 n -= m
1839         }
1840 }
1841
1842 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
1843         s.Grow(int64(c) + int64(pad))
1844         fillnop(s.P[c:], int(pad))
1845         return c + pad
1846 }
1847
1848 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
1849         if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
1850                 return l
1851         }
1852         return q
1853 }
1854
1855 // isJump returns whether p is a jump instruction.
1856 // It is used to ensure that no standalone or macro-fused jump will straddle
1857 // or end on a 32 byte boundary by inserting NOPs before the jumps.
1858 func isJump(p *obj.Prog) bool {
1859         return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
1860                 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
1861 }
1862
1863 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
1864 // jump. Otherwise, nil is returned.
1865 func lookForJCC(p *obj.Prog) *obj.Prog {
1866         // Skip any PCDATA, FUNCDATA or NOP instructions
1867         var q *obj.Prog
1868         for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
1869         }
1870
1871         if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
1872                 return nil
1873         }
1874
1875         switch q.As {
1876         case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
1877                 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
1878         default:
1879                 return nil
1880         }
1881
1882         return q
1883 }
1884
1885 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
1886 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
1887 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
1888 func fusedJump(p *obj.Prog) (bool, uint8) {
1889         var fusedSize uint8
1890
1891         // The first instruction in a macro fused pair may be preceded by the LOCK prefix,
1892         // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
1893         // need to be careful to insert any padding before the locks rather than directly after them.
1894
1895         if p.As == AXRELEASE || p.As == AXACQUIRE {
1896                 fusedSize += p.Isize
1897                 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1898                 }
1899                 if p == nil {
1900                         return false, 0
1901                 }
1902         }
1903         if p.As == ALOCK {
1904                 fusedSize += p.Isize
1905                 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1906                 }
1907                 if p == nil {
1908                         return false, 0
1909                 }
1910         }
1911         cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
1912
1913         cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
1914                 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
1915
1916         testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
1917                 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
1918
1919         incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
1920                 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
1921
1922         if !cmpAddSub && !testAnd && !incDec {
1923                 return false, 0
1924         }
1925
1926         if !incDec {
1927                 var argOne obj.AddrType
1928                 var argTwo obj.AddrType
1929                 if cmp {
1930                         argOne = p.From.Type
1931                         argTwo = p.To.Type
1932                 } else {
1933                         argOne = p.To.Type
1934                         argTwo = p.From.Type
1935                 }
1936                 if argOne == obj.TYPE_REG {
1937                         if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
1938                                 return false, 0
1939                         }
1940                 } else if argOne == obj.TYPE_MEM {
1941                         if argTwo != obj.TYPE_REG {
1942                                 return false, 0
1943                         }
1944                 } else {
1945                         return false, 0
1946                 }
1947         }
1948
1949         fusedSize += p.Isize
1950         jmp := lookForJCC(p)
1951         if jmp == nil {
1952                 return false, 0
1953         }
1954
1955         fusedSize += jmp.Isize
1956
1957         if testAnd {
1958                 return true, fusedSize
1959         }
1960
1961         if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
1962                 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
1963                 return false, 0
1964         }
1965
1966         if cmpAddSub {
1967                 return true, fusedSize
1968         }
1969
1970         if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
1971                 return false, 0
1972         }
1973
1974         return true, fusedSize
1975 }
1976
1977 type padJumpsCtx int32
1978
1979 func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
1980         // Disable jump padding on 32 bit builds by settting
1981         // padJumps to 0.
1982         if ctxt.Arch.Family == sys.I386 {
1983                 return padJumpsCtx(0)
1984         }
1985
1986         // Disable jump padding for hand written assembly code.
1987         if ctxt.IsAsm {
1988                 return padJumpsCtx(0)
1989         }
1990
1991         return padJumpsCtx(32)
1992 }
1993
1994 // padJump detects whether the instruction being assembled is a standalone or a macro-fused
1995 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
1996 // not cross or end on a 32 byte boundary.
1997 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
1998         if pjc == 0 {
1999                 return c
2000         }
2001
2002         var toPad int32
2003         fj, fjSize := fusedJump(p)
2004         mask := int32(pjc - 1)
2005         if fj {
2006                 if (c&mask)+int32(fjSize) >= int32(pjc) {
2007                         toPad = int32(pjc) - (c & mask)
2008                 }
2009         } else if isJump(p) {
2010                 if (c&mask)+int32(p.Isize) >= int32(pjc) {
2011                         toPad = int32(pjc) - (c & mask)
2012                 }
2013         }
2014         if toPad <= 0 {
2015                 return c
2016         }
2017
2018         return noppad(ctxt, s, c, toPad)
2019 }
2020
2021 // reAssemble is called if an instruction's size changes during assembly. If
2022 // it does and the instruction is a standalone or a macro-fused jump we need to
2023 // reassemble.
2024 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
2025         if pjc == 0 {
2026                 return false
2027         }
2028
2029         fj, _ := fusedJump(p)
2030         return fj || isJump(p)
2031 }
2032
2033 type nopPad struct {
2034         p *obj.Prog // Instruction before the pad
2035         n int32     // Size of the pad
2036 }
2037
2038 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
2039         pjc := makePjcCtx(ctxt)
2040
2041         if s.P != nil {
2042                 return
2043         }
2044
2045         if ycover[0] == 0 {
2046                 ctxt.Diag("x86 tables not initialized, call x86.instinit first")
2047         }
2048
2049         for p := s.Func().Text; p != nil; p = p.Link {
2050                 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
2051                         p.To.SetTarget(p)
2052                 }
2053                 if p.As == AADJSP {
2054                         p.To.Type = obj.TYPE_REG
2055                         p.To.Reg = REG_SP
2056                         // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
2057                         // One exception: It is smaller to encode $-0x80 than $0x80.
2058                         // For that case, flip the sign and the op:
2059                         // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
2060                         switch v := p.From.Offset; {
2061                         case v == 0:
2062                                 p.As = obj.ANOP
2063                         case v == 0x80 || (v < 0 && v != -0x80):
2064                                 p.As = spadjop(ctxt, AADDL, AADDQ)
2065                                 p.From.Offset *= -1
2066                         default:
2067                                 p.As = spadjop(ctxt, ASUBL, ASUBQ)
2068                         }
2069                 }
2070                 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
2071                         if p.To.Type != obj.TYPE_REG {
2072                                 ctxt.Diag("non-retpoline-compatible: %v", p)
2073                                 continue
2074                         }
2075                         p.To.Type = obj.TYPE_BRANCH
2076                         p.To.Name = obj.NAME_EXTERN
2077                         p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
2078                         p.To.Reg = 0
2079                         p.To.Offset = 0
2080                 }
2081         }
2082
2083         var count int64 // rough count of number of instructions
2084         for p := s.Func().Text; p != nil; p = p.Link {
2085                 count++
2086                 p.Back = branchShort // use short branches first time through
2087                 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
2088                         p.Back |= branchBackwards
2089                         q.Back |= branchLoopHead
2090                 }
2091         }
2092         s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
2093
2094         var ab AsmBuf
2095         var n int
2096         var c int32
2097         errors := ctxt.Errors
2098         var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
2099         nrelocs0 := len(s.R)
2100         for {
2101                 // This loop continues while there are reasons to re-assemble
2102                 // whole block, like the presence of long forward jumps.
2103                 reAssemble := false
2104                 for i := range s.R[nrelocs0:] {
2105                         s.R[nrelocs0+i] = obj.Reloc{}
2106                 }
2107                 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
2108                 s.P = s.P[:0]
2109                 c = 0
2110                 var pPrev *obj.Prog
2111                 nops = nops[:0]
2112                 for p := s.Func().Text; p != nil; p = p.Link {
2113                         c0 := c
2114                         c = pjc.padJump(ctxt, s, p, c)
2115
2116                         if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
2117                                 // pad with NOPs
2118                                 v := -c & (loopAlign - 1)
2119
2120                                 if v <= maxLoopPad {
2121                                         s.Grow(int64(c) + int64(v))
2122                                         fillnop(s.P[c:], int(v))
2123                                         c += v
2124                                 }
2125                         }
2126
2127                         p.Pc = int64(c)
2128
2129                         // process forward jumps to p
2130                         for q := p.Rel; q != nil; q = q.Forwd {
2131                                 v := int32(p.Pc - (q.Pc + int64(q.Isize)))
2132                                 if q.Back&branchShort != 0 {
2133                                         if v > 127 {
2134                                                 reAssemble = true
2135                                                 q.Back ^= branchShort
2136                                         }
2137
2138                                         if q.As == AJCXZL || q.As == AXBEGIN {
2139                                                 s.P[q.Pc+2] = byte(v)
2140                                         } else {
2141                                                 s.P[q.Pc+1] = byte(v)
2142                                         }
2143                                 } else {
2144                                         binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
2145                                 }
2146                         }
2147
2148                         p.Rel = nil
2149
2150                         p.Pc = int64(c)
2151                         ab.asmins(ctxt, s, p)
2152                         m := ab.Len()
2153                         if int(p.Isize) != m {
2154                                 p.Isize = uint8(m)
2155                                 if pjc.reAssemble(p) {
2156                                         // We need to re-assemble here to check for jumps and fused jumps
2157                                         // that span or end on 32 byte boundaries.
2158                                         reAssemble = true
2159                                 }
2160                         }
2161
2162                         s.Grow(p.Pc + int64(m))
2163                         copy(s.P[p.Pc:], ab.Bytes())
2164                         // If there was padding, remember it.
2165                         if pPrev != nil && !ctxt.IsAsm && c > c0 {
2166                                 nops = append(nops, nopPad{p: pPrev, n: c - c0})
2167                         }
2168                         c += int32(m)
2169                         pPrev = p
2170                 }
2171
2172                 n++
2173                 if n > 20 {
2174                         ctxt.Diag("span must be looping")
2175                         log.Fatalf("loop")
2176                 }
2177                 if !reAssemble {
2178                         break
2179                 }
2180                 if ctxt.Errors > errors {
2181                         return
2182                 }
2183         }
2184         // splice padding nops into Progs
2185         for _, n := range nops {
2186                 pp := n.p
2187                 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
2188                 pp.Link = np
2189         }
2190
2191         s.Size = int64(c)
2192
2193         if false { /* debug['a'] > 1 */
2194                 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
2195                 var i int
2196                 for i = 0; i < len(s.P); i++ {
2197                         fmt.Printf(" %.2x", s.P[i])
2198                         if i%16 == 15 {
2199                                 fmt.Printf("\n  %.6x", uint(i+1))
2200                         }
2201                 }
2202
2203                 if i%16 != 0 {
2204                         fmt.Printf("\n")
2205                 }
2206
2207                 for i := 0; i < len(s.R); i++ {
2208                         r := &s.R[i]
2209                         fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
2210                 }
2211         }
2212
2213         // Mark nonpreemptible instruction sequences.
2214         // The 2-instruction TLS access sequence
2215         //      MOVQ TLS, BX
2216         //      MOVQ 0(BX)(TLS*1), BX
2217         // is not async preemptible, as if it is preempted and resumed on
2218         // a different thread, the TLS address may become invalid.
2219         if !CanUse1InsnTLS(ctxt) {
2220                 useTLS := func(p *obj.Prog) bool {
2221                         // Only need to mark the second instruction, which has
2222                         // REG_TLS as Index. (It is okay to interrupt and restart
2223                         // the first instruction.)
2224                         return p.From.Index == REG_TLS
2225                 }
2226                 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
2227         }
2228 }
2229
2230 func instinit(ctxt *obj.Link) {
2231         if ycover[0] != 0 {
2232                 // Already initialized; stop now.
2233                 // This happens in the cmd/asm tests,
2234                 // each of which re-initializes the arch.
2235                 return
2236         }
2237
2238         switch ctxt.Headtype {
2239         case objabi.Hplan9:
2240                 plan9privates = ctxt.Lookup("_privates")
2241         }
2242
2243         for i := range avxOptab {
2244                 c := avxOptab[i].as
2245                 if opindex[c&obj.AMask] != nil {
2246                         ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
2247                 }
2248                 opindex[c&obj.AMask] = &avxOptab[i]
2249         }
2250         for i := 1; optab[i].as != 0; i++ {
2251                 c := optab[i].as
2252                 if opindex[c&obj.AMask] != nil {
2253                         ctxt.Diag("phase error in optab: %d (%v)", i, c)
2254                 }
2255                 opindex[c&obj.AMask] = &optab[i]
2256         }
2257
2258         for i := 0; i < Ymax; i++ {
2259                 ycover[i*Ymax+i] = 1
2260         }
2261
2262         ycover[Yi0*Ymax+Yu2] = 1
2263         ycover[Yi1*Ymax+Yu2] = 1
2264
2265         ycover[Yi0*Ymax+Yi8] = 1
2266         ycover[Yi1*Ymax+Yi8] = 1
2267         ycover[Yu2*Ymax+Yi8] = 1
2268         ycover[Yu7*Ymax+Yi8] = 1
2269
2270         ycover[Yi0*Ymax+Yu7] = 1
2271         ycover[Yi1*Ymax+Yu7] = 1
2272         ycover[Yu2*Ymax+Yu7] = 1
2273
2274         ycover[Yi0*Ymax+Yu8] = 1
2275         ycover[Yi1*Ymax+Yu8] = 1
2276         ycover[Yu2*Ymax+Yu8] = 1
2277         ycover[Yu7*Ymax+Yu8] = 1
2278
2279         ycover[Yi0*Ymax+Ys32] = 1
2280         ycover[Yi1*Ymax+Ys32] = 1
2281         ycover[Yu2*Ymax+Ys32] = 1
2282         ycover[Yu7*Ymax+Ys32] = 1
2283         ycover[Yu8*Ymax+Ys32] = 1
2284         ycover[Yi8*Ymax+Ys32] = 1
2285
2286         ycover[Yi0*Ymax+Yi32] = 1
2287         ycover[Yi1*Ymax+Yi32] = 1
2288         ycover[Yu2*Ymax+Yi32] = 1
2289         ycover[Yu7*Ymax+Yi32] = 1
2290         ycover[Yu8*Ymax+Yi32] = 1
2291         ycover[Yi8*Ymax+Yi32] = 1
2292         ycover[Ys32*Ymax+Yi32] = 1
2293
2294         ycover[Yi0*Ymax+Yi64] = 1
2295         ycover[Yi1*Ymax+Yi64] = 1
2296         ycover[Yu7*Ymax+Yi64] = 1
2297         ycover[Yu2*Ymax+Yi64] = 1
2298         ycover[Yu8*Ymax+Yi64] = 1
2299         ycover[Yi8*Ymax+Yi64] = 1
2300         ycover[Ys32*Ymax+Yi64] = 1
2301         ycover[Yi32*Ymax+Yi64] = 1
2302
2303         ycover[Yal*Ymax+Yrb] = 1
2304         ycover[Ycl*Ymax+Yrb] = 1
2305         ycover[Yax*Ymax+Yrb] = 1
2306         ycover[Ycx*Ymax+Yrb] = 1
2307         ycover[Yrx*Ymax+Yrb] = 1
2308         ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
2309
2310         ycover[Ycl*Ymax+Ycx] = 1
2311
2312         ycover[Yax*Ymax+Yrx] = 1
2313         ycover[Ycx*Ymax+Yrx] = 1
2314
2315         ycover[Yax*Ymax+Yrl] = 1
2316         ycover[Ycx*Ymax+Yrl] = 1
2317         ycover[Yrx*Ymax+Yrl] = 1
2318         ycover[Yrl32*Ymax+Yrl] = 1
2319
2320         ycover[Yf0*Ymax+Yrf] = 1
2321
2322         ycover[Yal*Ymax+Ymb] = 1
2323         ycover[Ycl*Ymax+Ymb] = 1
2324         ycover[Yax*Ymax+Ymb] = 1
2325         ycover[Ycx*Ymax+Ymb] = 1
2326         ycover[Yrx*Ymax+Ymb] = 1
2327         ycover[Yrb*Ymax+Ymb] = 1
2328         ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
2329         ycover[Ym*Ymax+Ymb] = 1
2330
2331         ycover[Yax*Ymax+Yml] = 1
2332         ycover[Ycx*Ymax+Yml] = 1
2333         ycover[Yrx*Ymax+Yml] = 1
2334         ycover[Yrl*Ymax+Yml] = 1
2335         ycover[Yrl32*Ymax+Yml] = 1
2336         ycover[Ym*Ymax+Yml] = 1
2337
2338         ycover[Yax*Ymax+Ymm] = 1
2339         ycover[Ycx*Ymax+Ymm] = 1
2340         ycover[Yrx*Ymax+Ymm] = 1
2341         ycover[Yrl*Ymax+Ymm] = 1
2342         ycover[Yrl32*Ymax+Ymm] = 1
2343         ycover[Ym*Ymax+Ymm] = 1
2344         ycover[Ymr*Ymax+Ymm] = 1
2345
2346         ycover[Yxr0*Ymax+Yxr] = 1
2347
2348         ycover[Ym*Ymax+Yxm] = 1
2349         ycover[Yxr0*Ymax+Yxm] = 1
2350         ycover[Yxr*Ymax+Yxm] = 1
2351
2352         ycover[Ym*Ymax+Yym] = 1
2353         ycover[Yyr*Ymax+Yym] = 1
2354
2355         ycover[Yxr0*Ymax+YxrEvex] = 1
2356         ycover[Yxr*Ymax+YxrEvex] = 1
2357
2358         ycover[Ym*Ymax+YxmEvex] = 1
2359         ycover[Yxr0*Ymax+YxmEvex] = 1
2360         ycover[Yxr*Ymax+YxmEvex] = 1
2361         ycover[YxrEvex*Ymax+YxmEvex] = 1
2362
2363         ycover[Yyr*Ymax+YyrEvex] = 1
2364
2365         ycover[Ym*Ymax+YymEvex] = 1
2366         ycover[Yyr*Ymax+YymEvex] = 1
2367         ycover[YyrEvex*Ymax+YymEvex] = 1
2368
2369         ycover[Ym*Ymax+Yzm] = 1
2370         ycover[Yzr*Ymax+Yzm] = 1
2371
2372         ycover[Yk0*Ymax+Yk] = 1
2373         ycover[Yknot0*Ymax+Yk] = 1
2374
2375         ycover[Yk0*Ymax+Ykm] = 1
2376         ycover[Yknot0*Ymax+Ykm] = 1
2377         ycover[Yk*Ymax+Ykm] = 1
2378         ycover[Ym*Ymax+Ykm] = 1
2379
2380         ycover[Yxvm*Ymax+YxvmEvex] = 1
2381
2382         ycover[Yyvm*Ymax+YyvmEvex] = 1
2383
2384         for i := 0; i < MAXREG; i++ {
2385                 reg[i] = -1
2386                 if i >= REG_AL && i <= REG_R15B {
2387                         reg[i] = (i - REG_AL) & 7
2388                         if i >= REG_SPB && i <= REG_DIB {
2389                                 regrex[i] = 0x40
2390                         }
2391                         if i >= REG_R8B && i <= REG_R15B {
2392                                 regrex[i] = Rxr | Rxx | Rxb
2393                         }
2394                 }
2395
2396                 if i >= REG_AH && i <= REG_BH {
2397                         reg[i] = 4 + ((i - REG_AH) & 7)
2398                 }
2399                 if i >= REG_AX && i <= REG_R15 {
2400                         reg[i] = (i - REG_AX) & 7
2401                         if i >= REG_R8 {
2402                                 regrex[i] = Rxr | Rxx | Rxb
2403                         }
2404                 }
2405
2406                 if i >= REG_F0 && i <= REG_F0+7 {
2407                         reg[i] = (i - REG_F0) & 7
2408                 }
2409                 if i >= REG_M0 && i <= REG_M0+7 {
2410                         reg[i] = (i - REG_M0) & 7
2411                 }
2412                 if i >= REG_K0 && i <= REG_K0+7 {
2413                         reg[i] = (i - REG_K0) & 7
2414                 }
2415                 if i >= REG_X0 && i <= REG_X0+15 {
2416                         reg[i] = (i - REG_X0) & 7
2417                         if i >= REG_X0+8 {
2418                                 regrex[i] = Rxr | Rxx | Rxb
2419                         }
2420                 }
2421                 if i >= REG_X16 && i <= REG_X16+15 {
2422                         reg[i] = (i - REG_X16) & 7
2423                         if i >= REG_X16+8 {
2424                                 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2425                         } else {
2426                                 regrex[i] = RxrEvex
2427                         }
2428                 }
2429                 if i >= REG_Y0 && i <= REG_Y0+15 {
2430                         reg[i] = (i - REG_Y0) & 7
2431                         if i >= REG_Y0+8 {
2432                                 regrex[i] = Rxr | Rxx | Rxb
2433                         }
2434                 }
2435                 if i >= REG_Y16 && i <= REG_Y16+15 {
2436                         reg[i] = (i - REG_Y16) & 7
2437                         if i >= REG_Y16+8 {
2438                                 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2439                         } else {
2440                                 regrex[i] = RxrEvex
2441                         }
2442                 }
2443                 if i >= REG_Z0 && i <= REG_Z0+15 {
2444                         reg[i] = (i - REG_Z0) & 7
2445                         if i > REG_Z0+7 {
2446                                 regrex[i] = Rxr | Rxx | Rxb
2447                         }
2448                 }
2449                 if i >= REG_Z16 && i <= REG_Z16+15 {
2450                         reg[i] = (i - REG_Z16) & 7
2451                         if i >= REG_Z16+8 {
2452                                 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2453                         } else {
2454                                 regrex[i] = RxrEvex
2455                         }
2456                 }
2457
2458                 if i >= REG_CR+8 && i <= REG_CR+15 {
2459                         regrex[i] = Rxr
2460                 }
2461         }
2462 }
2463
2464 var isAndroid = buildcfg.GOOS == "android"
2465
2466 func prefixof(ctxt *obj.Link, a *obj.Addr) int {
2467         if a.Reg < REG_CS && a.Index < REG_CS { // fast path
2468                 return 0
2469         }
2470         if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
2471                 switch a.Reg {
2472                 case REG_CS:
2473                         return 0x2e
2474
2475                 case REG_DS:
2476                         return 0x3e
2477
2478                 case REG_ES:
2479                         return 0x26
2480
2481                 case REG_FS:
2482                         return 0x64
2483
2484                 case REG_GS:
2485                         return 0x65
2486
2487                 case REG_TLS:
2488                         // NOTE: Systems listed here should be only systems that
2489                         // support direct TLS references like 8(TLS) implemented as
2490                         // direct references from FS or GS. Systems that require
2491                         // the initial-exec model, where you load the TLS base into
2492                         // a register and then index from that register, do not reach
2493                         // this code and should not be listed.
2494                         if ctxt.Arch.Family == sys.I386 {
2495                                 switch ctxt.Headtype {
2496                                 default:
2497                                         if isAndroid {
2498                                                 return 0x65 // GS
2499                                         }
2500                                         log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2501
2502                                 case objabi.Hdarwin,
2503                                         objabi.Hdragonfly,
2504                                         objabi.Hfreebsd,
2505                                         objabi.Hnetbsd,
2506                                         objabi.Hopenbsd:
2507                                         return 0x65 // GS
2508                                 }
2509                         }
2510
2511                         switch ctxt.Headtype {
2512                         default:
2513                                 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2514
2515                         case objabi.Hlinux:
2516                                 if isAndroid {
2517                                         return 0x64 // FS
2518                                 }
2519
2520                                 if ctxt.Flag_shared {
2521                                         log.Fatalf("unknown TLS base register for linux with -shared")
2522                                 } else {
2523                                         return 0x64 // FS
2524                                 }
2525
2526                         case objabi.Hdragonfly,
2527                                 objabi.Hfreebsd,
2528                                 objabi.Hnetbsd,
2529                                 objabi.Hopenbsd,
2530                                 objabi.Hsolaris:
2531                                 return 0x64 // FS
2532
2533                         case objabi.Hdarwin:
2534                                 return 0x65 // GS
2535                         }
2536                 }
2537         }
2538
2539         if ctxt.Arch.Family == sys.I386 {
2540                 if a.Index == REG_TLS && ctxt.Flag_shared {
2541                         // When building for inclusion into a shared library, an instruction of the form
2542                         //     MOVL off(CX)(TLS*1), AX
2543                         // becomes
2544                         //     mov %gs:off(%ecx), %eax
2545                         // which assumes that the correct TLS offset has been loaded into %ecx (today
2546                         // there is only one TLS variable -- g -- so this is OK). When not building for
2547                         // a shared library the instruction it becomes
2548                         //     mov 0x0(%ecx), %eax
2549                         // and a R_TLS_LE relocation, and so does not require a prefix.
2550                         return 0x65 // GS
2551                 }
2552                 return 0
2553         }
2554
2555         switch a.Index {
2556         case REG_CS:
2557                 return 0x2e
2558
2559         case REG_DS:
2560                 return 0x3e
2561
2562         case REG_ES:
2563                 return 0x26
2564
2565         case REG_TLS:
2566                 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
2567                         // When building for inclusion into a shared library, an instruction of the form
2568                         //     MOV off(CX)(TLS*1), AX
2569                         // becomes
2570                         //     mov %fs:off(%rcx), %rax
2571                         // which assumes that the correct TLS offset has been loaded into %rcx (today
2572                         // there is only one TLS variable -- g -- so this is OK). When not building for
2573                         // a shared library the instruction does not require a prefix.
2574                         return 0x64
2575                 }
2576
2577         case REG_FS:
2578                 return 0x64
2579
2580         case REG_GS:
2581                 return 0x65
2582         }
2583
2584         return 0
2585 }
2586
2587 // oclassRegList returns multisource operand class for addr.
2588 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
2589         // TODO(quasilyte): when oclass register case is refactored into
2590         // lookup table, use it here to get register kind more easily.
2591         // Helper functions like regIsXmm should go away too (they will become redundant).
2592
2593         regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
2594         regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
2595         regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
2596
2597         reg0, reg1 := decodeRegisterRange(addr.Offset)
2598         low := regIndex(int16(reg0))
2599         high := regIndex(int16(reg1))
2600
2601         if ctxt.Arch.Family == sys.I386 {
2602                 if low >= 8 || high >= 8 {
2603                         return Yxxx
2604                 }
2605         }
2606
2607         switch high - low {
2608         case 3:
2609                 switch {
2610                 case regIsXmm(reg0) && regIsXmm(reg1):
2611                         return YxrEvexMulti4
2612                 case regIsYmm(reg0) && regIsYmm(reg1):
2613                         return YyrEvexMulti4
2614                 case regIsZmm(reg0) && regIsZmm(reg1):
2615                         return YzrMulti4
2616                 default:
2617                         return Yxxx
2618                 }
2619         default:
2620                 return Yxxx
2621         }
2622 }
2623
2624 // oclassVMem returns V-mem (vector memory with VSIB) operand class.
2625 // For addr that is not V-mem returns (Yxxx, false).
2626 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
2627         switch addr.Index {
2628         case REG_X0 + 0,
2629                 REG_X0 + 1,
2630                 REG_X0 + 2,
2631                 REG_X0 + 3,
2632                 REG_X0 + 4,
2633                 REG_X0 + 5,
2634                 REG_X0 + 6,
2635                 REG_X0 + 7:
2636                 return Yxvm, true
2637         case REG_X8 + 0,
2638                 REG_X8 + 1,
2639                 REG_X8 + 2,
2640                 REG_X8 + 3,
2641                 REG_X8 + 4,
2642                 REG_X8 + 5,
2643                 REG_X8 + 6,
2644                 REG_X8 + 7:
2645                 if ctxt.Arch.Family == sys.I386 {
2646                         return Yxxx, true
2647                 }
2648                 return Yxvm, true
2649         case REG_X16 + 0,
2650                 REG_X16 + 1,
2651                 REG_X16 + 2,
2652                 REG_X16 + 3,
2653                 REG_X16 + 4,
2654                 REG_X16 + 5,
2655                 REG_X16 + 6,
2656                 REG_X16 + 7,
2657                 REG_X16 + 8,
2658                 REG_X16 + 9,
2659                 REG_X16 + 10,
2660                 REG_X16 + 11,
2661                 REG_X16 + 12,
2662                 REG_X16 + 13,
2663                 REG_X16 + 14,
2664                 REG_X16 + 15:
2665                 if ctxt.Arch.Family == sys.I386 {
2666                         return Yxxx, true
2667                 }
2668                 return YxvmEvex, true
2669
2670         case REG_Y0 + 0,
2671                 REG_Y0 + 1,
2672                 REG_Y0 + 2,
2673                 REG_Y0 + 3,
2674                 REG_Y0 + 4,
2675                 REG_Y0 + 5,
2676                 REG_Y0 + 6,
2677                 REG_Y0 + 7:
2678                 return Yyvm, true
2679         case REG_Y8 + 0,
2680                 REG_Y8 + 1,
2681                 REG_Y8 + 2,
2682                 REG_Y8 + 3,
2683                 REG_Y8 + 4,
2684                 REG_Y8 + 5,
2685                 REG_Y8 + 6,
2686                 REG_Y8 + 7:
2687                 if ctxt.Arch.Family == sys.I386 {
2688                         return Yxxx, true
2689                 }
2690                 return Yyvm, true
2691         case REG_Y16 + 0,
2692                 REG_Y16 + 1,
2693                 REG_Y16 + 2,
2694                 REG_Y16 + 3,
2695                 REG_Y16 + 4,
2696                 REG_Y16 + 5,
2697                 REG_Y16 + 6,
2698                 REG_Y16 + 7,
2699                 REG_Y16 + 8,
2700                 REG_Y16 + 9,
2701                 REG_Y16 + 10,
2702                 REG_Y16 + 11,
2703                 REG_Y16 + 12,
2704                 REG_Y16 + 13,
2705                 REG_Y16 + 14,
2706                 REG_Y16 + 15:
2707                 if ctxt.Arch.Family == sys.I386 {
2708                         return Yxxx, true
2709                 }
2710                 return YyvmEvex, true
2711
2712         case REG_Z0 + 0,
2713                 REG_Z0 + 1,
2714                 REG_Z0 + 2,
2715                 REG_Z0 + 3,
2716                 REG_Z0 + 4,
2717                 REG_Z0 + 5,
2718                 REG_Z0 + 6,
2719                 REG_Z0 + 7:
2720                 return Yzvm, true
2721         case REG_Z8 + 0,
2722                 REG_Z8 + 1,
2723                 REG_Z8 + 2,
2724                 REG_Z8 + 3,
2725                 REG_Z8 + 4,
2726                 REG_Z8 + 5,
2727                 REG_Z8 + 6,
2728                 REG_Z8 + 7,
2729                 REG_Z8 + 8,
2730                 REG_Z8 + 9,
2731                 REG_Z8 + 10,
2732                 REG_Z8 + 11,
2733                 REG_Z8 + 12,
2734                 REG_Z8 + 13,
2735                 REG_Z8 + 14,
2736                 REG_Z8 + 15,
2737                 REG_Z8 + 16,
2738                 REG_Z8 + 17,
2739                 REG_Z8 + 18,
2740                 REG_Z8 + 19,
2741                 REG_Z8 + 20,
2742                 REG_Z8 + 21,
2743                 REG_Z8 + 22,
2744                 REG_Z8 + 23:
2745                 if ctxt.Arch.Family == sys.I386 {
2746                         return Yxxx, true
2747                 }
2748                 return Yzvm, true
2749         }
2750
2751         return Yxxx, false
2752 }
2753
2754 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
2755         switch a.Type {
2756         case obj.TYPE_REGLIST:
2757                 return oclassRegList(ctxt, a)
2758
2759         case obj.TYPE_NONE:
2760                 return Ynone
2761
2762         case obj.TYPE_BRANCH:
2763                 return Ybr
2764
2765         case obj.TYPE_INDIR:
2766                 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
2767                         return Yindir
2768                 }
2769                 return Yxxx
2770
2771         case obj.TYPE_MEM:
2772                 // Pseudo registers have negative index, but SP is
2773                 // not pseudo on x86, hence REG_SP check is not redundant.
2774                 if a.Index == REG_SP || a.Index < 0 {
2775                         // Can't use FP/SB/PC/SP as the index register.
2776                         return Yxxx
2777                 }
2778
2779                 if vmem, ok := oclassVMem(ctxt, a); ok {
2780                         return vmem
2781                 }
2782
2783                 if ctxt.Arch.Family == sys.AMD64 {
2784                         switch a.Name {
2785                         case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
2786                                 // Global variables can't use index registers and their
2787                                 // base register is %rip (%rip is encoded as REG_NONE).
2788                                 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
2789                                         return Yxxx
2790                                 }
2791                         case obj.NAME_AUTO, obj.NAME_PARAM:
2792                                 // These names must have a base of SP.  The old compiler
2793                                 // uses 0 for the base register. SSA uses REG_SP.
2794                                 if a.Reg != REG_SP && a.Reg != 0 {
2795                                         return Yxxx
2796                                 }
2797                         case obj.NAME_NONE:
2798                                 // everything is ok
2799                         default:
2800                                 // unknown name
2801                                 return Yxxx
2802                         }
2803                 }
2804                 return Ym
2805
2806         case obj.TYPE_ADDR:
2807                 switch a.Name {
2808                 case obj.NAME_GOTREF:
2809                         ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
2810                         return Yxxx
2811
2812                 case obj.NAME_EXTERN,
2813                         obj.NAME_STATIC:
2814                         if a.Sym != nil && useAbs(ctxt, a.Sym) {
2815                                 return Yi32
2816                         }
2817                         return Yiauto // use pc-relative addressing
2818
2819                 case obj.NAME_AUTO,
2820                         obj.NAME_PARAM:
2821                         return Yiauto
2822                 }
2823
2824                 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
2825                 // and got Yi32 in an earlier version of this code.
2826                 // Keep doing that until we fix yduff etc.
2827                 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
2828                         return Yi32
2829                 }
2830
2831                 if a.Sym != nil || a.Name != obj.NAME_NONE {
2832                         ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
2833                 }
2834                 fallthrough
2835
2836         case obj.TYPE_CONST:
2837                 if a.Sym != nil {
2838                         ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
2839                 }
2840
2841                 v := a.Offset
2842                 if ctxt.Arch.Family == sys.I386 {
2843                         v = int64(int32(v))
2844                 }
2845                 switch {
2846                 case v == 0:
2847                         return Yi0
2848                 case v == 1:
2849                         return Yi1
2850                 case v >= 0 && v <= 3:
2851                         return Yu2
2852                 case v >= 0 && v <= 127:
2853                         return Yu7
2854                 case v >= 0 && v <= 255:
2855                         return Yu8
2856                 case v >= -128 && v <= 127:
2857                         return Yi8
2858                 }
2859                 if ctxt.Arch.Family == sys.I386 {
2860                         return Yi32
2861                 }
2862                 l := int32(v)
2863                 if int64(l) == v {
2864                         return Ys32 // can sign extend
2865                 }
2866                 if v>>32 == 0 {
2867                         return Yi32 // unsigned
2868                 }
2869                 return Yi64
2870
2871         case obj.TYPE_TEXTSIZE:
2872                 return Ytextsize
2873         }
2874
2875         if a.Type != obj.TYPE_REG {
2876                 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
2877                 return Yxxx
2878         }
2879
2880         switch a.Reg {
2881         case REG_AL:
2882                 return Yal
2883
2884         case REG_AX:
2885                 return Yax
2886
2887                 /*
2888                         case REG_SPB:
2889                 */
2890         case REG_BPB,
2891                 REG_SIB,
2892                 REG_DIB,
2893                 REG_R8B,
2894                 REG_R9B,
2895                 REG_R10B,
2896                 REG_R11B,
2897                 REG_R12B,
2898                 REG_R13B,
2899                 REG_R14B,
2900                 REG_R15B:
2901                 if ctxt.Arch.Family == sys.I386 {
2902                         return Yxxx
2903                 }
2904                 fallthrough
2905
2906         case REG_DL,
2907                 REG_BL,
2908                 REG_AH,
2909                 REG_CH,
2910                 REG_DH,
2911                 REG_BH:
2912                 return Yrb
2913
2914         case REG_CL:
2915                 return Ycl
2916
2917         case REG_CX:
2918                 return Ycx
2919
2920         case REG_DX, REG_BX:
2921                 return Yrx
2922
2923         case REG_R8, // not really Yrl
2924                 REG_R9,
2925                 REG_R10,
2926                 REG_R11,
2927                 REG_R12,
2928                 REG_R13,
2929                 REG_R14,
2930                 REG_R15:
2931                 if ctxt.Arch.Family == sys.I386 {
2932                         return Yxxx
2933                 }
2934                 fallthrough
2935
2936         case REG_SP, REG_BP, REG_SI, REG_DI:
2937                 if ctxt.Arch.Family == sys.I386 {
2938                         return Yrl32
2939                 }
2940                 return Yrl
2941
2942         case REG_F0 + 0:
2943                 return Yf0
2944
2945         case REG_F0 + 1,
2946                 REG_F0 + 2,
2947                 REG_F0 + 3,
2948                 REG_F0 + 4,
2949                 REG_F0 + 5,
2950                 REG_F0 + 6,
2951                 REG_F0 + 7:
2952                 return Yrf
2953
2954         case REG_M0 + 0,
2955                 REG_M0 + 1,
2956                 REG_M0 + 2,
2957                 REG_M0 + 3,
2958                 REG_M0 + 4,
2959                 REG_M0 + 5,
2960                 REG_M0 + 6,
2961                 REG_M0 + 7:
2962                 return Ymr
2963
2964         case REG_X0:
2965                 return Yxr0
2966
2967         case REG_X0 + 1,
2968                 REG_X0 + 2,
2969                 REG_X0 + 3,
2970                 REG_X0 + 4,
2971                 REG_X0 + 5,
2972                 REG_X0 + 6,
2973                 REG_X0 + 7,
2974                 REG_X0 + 8,
2975                 REG_X0 + 9,
2976                 REG_X0 + 10,
2977                 REG_X0 + 11,
2978                 REG_X0 + 12,
2979                 REG_X0 + 13,
2980                 REG_X0 + 14,
2981                 REG_X0 + 15:
2982                 return Yxr
2983
2984         case REG_X0 + 16,
2985                 REG_X0 + 17,
2986                 REG_X0 + 18,
2987                 REG_X0 + 19,
2988                 REG_X0 + 20,
2989                 REG_X0 + 21,
2990                 REG_X0 + 22,
2991                 REG_X0 + 23,
2992                 REG_X0 + 24,
2993                 REG_X0 + 25,
2994                 REG_X0 + 26,
2995                 REG_X0 + 27,
2996                 REG_X0 + 28,
2997                 REG_X0 + 29,
2998                 REG_X0 + 30,
2999                 REG_X0 + 31:
3000                 return YxrEvex
3001
3002         case REG_Y0 + 0,
3003                 REG_Y0 + 1,
3004                 REG_Y0 + 2,
3005                 REG_Y0 + 3,
3006                 REG_Y0 + 4,
3007                 REG_Y0 + 5,
3008                 REG_Y0 + 6,
3009                 REG_Y0 + 7,
3010                 REG_Y0 + 8,
3011                 REG_Y0 + 9,
3012                 REG_Y0 + 10,
3013                 REG_Y0 + 11,
3014                 REG_Y0 + 12,
3015                 REG_Y0 + 13,
3016                 REG_Y0 + 14,
3017                 REG_Y0 + 15:
3018                 return Yyr
3019
3020         case REG_Y0 + 16,
3021                 REG_Y0 + 17,
3022                 REG_Y0 + 18,
3023                 REG_Y0 + 19,
3024                 REG_Y0 + 20,
3025                 REG_Y0 + 21,
3026                 REG_Y0 + 22,
3027                 REG_Y0 + 23,
3028                 REG_Y0 + 24,
3029                 REG_Y0 + 25,
3030                 REG_Y0 + 26,
3031                 REG_Y0 + 27,
3032                 REG_Y0 + 28,
3033                 REG_Y0 + 29,
3034                 REG_Y0 + 30,
3035                 REG_Y0 + 31:
3036                 return YyrEvex
3037
3038         case REG_Z0 + 0,
3039                 REG_Z0 + 1,
3040                 REG_Z0 + 2,
3041                 REG_Z0 + 3,
3042                 REG_Z0 + 4,
3043                 REG_Z0 + 5,
3044                 REG_Z0 + 6,
3045                 REG_Z0 + 7:
3046                 return Yzr
3047
3048         case REG_Z0 + 8,
3049                 REG_Z0 + 9,
3050                 REG_Z0 + 10,
3051                 REG_Z0 + 11,
3052                 REG_Z0 + 12,
3053                 REG_Z0 + 13,
3054                 REG_Z0 + 14,
3055                 REG_Z0 + 15,
3056                 REG_Z0 + 16,
3057                 REG_Z0 + 17,
3058                 REG_Z0 + 18,
3059                 REG_Z0 + 19,
3060                 REG_Z0 + 20,
3061                 REG_Z0 + 21,
3062                 REG_Z0 + 22,
3063                 REG_Z0 + 23,
3064                 REG_Z0 + 24,
3065                 REG_Z0 + 25,
3066                 REG_Z0 + 26,
3067                 REG_Z0 + 27,
3068                 REG_Z0 + 28,
3069                 REG_Z0 + 29,
3070                 REG_Z0 + 30,
3071                 REG_Z0 + 31:
3072                 if ctxt.Arch.Family == sys.I386 {
3073                         return Yxxx
3074                 }
3075                 return Yzr
3076
3077         case REG_K0:
3078                 return Yk0
3079
3080         case REG_K0 + 1,
3081                 REG_K0 + 2,
3082                 REG_K0 + 3,
3083                 REG_K0 + 4,
3084                 REG_K0 + 5,
3085                 REG_K0 + 6,
3086                 REG_K0 + 7:
3087                 return Yknot0
3088
3089         case REG_CS:
3090                 return Ycs
3091         case REG_SS:
3092                 return Yss
3093         case REG_DS:
3094                 return Yds
3095         case REG_ES:
3096                 return Yes
3097         case REG_FS:
3098                 return Yfs
3099         case REG_GS:
3100                 return Ygs
3101         case REG_TLS:
3102                 return Ytls
3103
3104         case REG_GDTR:
3105                 return Ygdtr
3106         case REG_IDTR:
3107                 return Yidtr
3108         case REG_LDTR:
3109                 return Yldtr
3110         case REG_MSW:
3111                 return Ymsw
3112         case REG_TASK:
3113                 return Ytask
3114
3115         case REG_CR + 0:
3116                 return Ycr0
3117         case REG_CR + 1:
3118                 return Ycr1
3119         case REG_CR + 2:
3120                 return Ycr2
3121         case REG_CR + 3:
3122                 return Ycr3
3123         case REG_CR + 4:
3124                 return Ycr4
3125         case REG_CR + 5:
3126                 return Ycr5
3127         case REG_CR + 6:
3128                 return Ycr6
3129         case REG_CR + 7:
3130                 return Ycr7
3131         case REG_CR + 8:
3132                 return Ycr8
3133
3134         case REG_DR + 0:
3135                 return Ydr0
3136         case REG_DR + 1:
3137                 return Ydr1
3138         case REG_DR + 2:
3139                 return Ydr2
3140         case REG_DR + 3:
3141                 return Ydr3
3142         case REG_DR + 4:
3143                 return Ydr4
3144         case REG_DR + 5:
3145                 return Ydr5
3146         case REG_DR + 6:
3147                 return Ydr6
3148         case REG_DR + 7:
3149                 return Ydr7
3150
3151         case REG_TR + 0:
3152                 return Ytr0
3153         case REG_TR + 1:
3154                 return Ytr1
3155         case REG_TR + 2:
3156                 return Ytr2
3157         case REG_TR + 3:
3158                 return Ytr3
3159         case REG_TR + 4:
3160                 return Ytr4
3161         case REG_TR + 5:
3162                 return Ytr5
3163         case REG_TR + 6:
3164                 return Ytr6
3165         case REG_TR + 7:
3166                 return Ytr7
3167         }
3168
3169         return Yxxx
3170 }
3171
3172 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
3173 // and hold assembly state.
3174 type AsmBuf struct {
3175         buf      [100]byte
3176         off      int
3177         rexflag  int
3178         vexflag  bool // Per inst: true for VEX-encoded
3179         evexflag bool // Per inst: true for EVEX-encoded
3180         rep      bool
3181         repn     bool
3182         lock     bool
3183
3184         evex evexBits // Initialized when evexflag is true
3185 }
3186
3187 // Put1 appends one byte to the end of the buffer.
3188 func (ab *AsmBuf) Put1(x byte) {
3189         ab.buf[ab.off] = x
3190         ab.off++
3191 }
3192
3193 // Put2 appends two bytes to the end of the buffer.
3194 func (ab *AsmBuf) Put2(x, y byte) {
3195         ab.buf[ab.off+0] = x
3196         ab.buf[ab.off+1] = y
3197         ab.off += 2
3198 }
3199
3200 // Put3 appends three bytes to the end of the buffer.
3201 func (ab *AsmBuf) Put3(x, y, z byte) {
3202         ab.buf[ab.off+0] = x
3203         ab.buf[ab.off+1] = y
3204         ab.buf[ab.off+2] = z
3205         ab.off += 3
3206 }
3207
3208 // Put4 appends four bytes to the end of the buffer.
3209 func (ab *AsmBuf) Put4(x, y, z, w byte) {
3210         ab.buf[ab.off+0] = x
3211         ab.buf[ab.off+1] = y
3212         ab.buf[ab.off+2] = z
3213         ab.buf[ab.off+3] = w
3214         ab.off += 4
3215 }
3216
3217 // PutInt16 writes v into the buffer using little-endian encoding.
3218 func (ab *AsmBuf) PutInt16(v int16) {
3219         ab.buf[ab.off+0] = byte(v)
3220         ab.buf[ab.off+1] = byte(v >> 8)
3221         ab.off += 2
3222 }
3223
3224 // PutInt32 writes v into the buffer using little-endian encoding.
3225 func (ab *AsmBuf) PutInt32(v int32) {
3226         ab.buf[ab.off+0] = byte(v)
3227         ab.buf[ab.off+1] = byte(v >> 8)
3228         ab.buf[ab.off+2] = byte(v >> 16)
3229         ab.buf[ab.off+3] = byte(v >> 24)
3230         ab.off += 4
3231 }
3232
3233 // PutInt64 writes v into the buffer using little-endian encoding.
3234 func (ab *AsmBuf) PutInt64(v int64) {
3235         ab.buf[ab.off+0] = byte(v)
3236         ab.buf[ab.off+1] = byte(v >> 8)
3237         ab.buf[ab.off+2] = byte(v >> 16)
3238         ab.buf[ab.off+3] = byte(v >> 24)
3239         ab.buf[ab.off+4] = byte(v >> 32)
3240         ab.buf[ab.off+5] = byte(v >> 40)
3241         ab.buf[ab.off+6] = byte(v >> 48)
3242         ab.buf[ab.off+7] = byte(v >> 56)
3243         ab.off += 8
3244 }
3245
3246 // Put copies b into the buffer.
3247 func (ab *AsmBuf) Put(b []byte) {
3248         copy(ab.buf[ab.off:], b)
3249         ab.off += len(b)
3250 }
3251
3252 // PutOpBytesLit writes zero terminated sequence of bytes from op,
3253 // starting at specified offset (e.g. z counter value).
3254 // Trailing 0 is not written.
3255 //
3256 // Intended to be used for literal Z cases.
3257 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
3258 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
3259         for int(op[offset]) != 0 {
3260                 ab.Put1(byte(op[offset]))
3261                 offset++
3262         }
3263 }
3264
3265 // Insert inserts b at offset i.
3266 func (ab *AsmBuf) Insert(i int, b byte) {
3267         ab.off++
3268         copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
3269         ab.buf[i] = b
3270 }
3271
3272 // Last returns the byte at the end of the buffer.
3273 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
3274
3275 // Len returns the length of the buffer.
3276 func (ab *AsmBuf) Len() int { return ab.off }
3277
3278 // Bytes returns the contents of the buffer.
3279 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
3280
3281 // Reset empties the buffer.
3282 func (ab *AsmBuf) Reset() { ab.off = 0 }
3283
3284 // At returns the byte at offset i.
3285 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
3286
3287 // asmidx emits SIB byte.
3288 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
3289         var i int
3290
3291         // X/Y index register is used in VSIB.
3292         switch index {
3293         default:
3294                 goto bad
3295
3296         case REG_NONE:
3297                 i = 4 << 3
3298                 goto bas
3299
3300         case REG_R8,
3301                 REG_R9,
3302                 REG_R10,
3303                 REG_R11,
3304                 REG_R12,
3305                 REG_R13,
3306                 REG_R14,
3307                 REG_R15,
3308                 REG_X8,
3309                 REG_X9,
3310                 REG_X10,
3311                 REG_X11,
3312                 REG_X12,
3313                 REG_X13,
3314                 REG_X14,
3315                 REG_X15,
3316                 REG_X16,
3317                 REG_X17,
3318                 REG_X18,
3319                 REG_X19,
3320                 REG_X20,
3321                 REG_X21,
3322                 REG_X22,
3323                 REG_X23,
3324                 REG_X24,
3325                 REG_X25,
3326                 REG_X26,
3327                 REG_X27,
3328                 REG_X28,
3329                 REG_X29,
3330                 REG_X30,
3331                 REG_X31,
3332                 REG_Y8,
3333                 REG_Y9,
3334                 REG_Y10,
3335                 REG_Y11,
3336                 REG_Y12,
3337                 REG_Y13,
3338                 REG_Y14,
3339                 REG_Y15,
3340                 REG_Y16,
3341                 REG_Y17,
3342                 REG_Y18,
3343                 REG_Y19,
3344                 REG_Y20,
3345                 REG_Y21,
3346                 REG_Y22,
3347                 REG_Y23,
3348                 REG_Y24,
3349                 REG_Y25,
3350                 REG_Y26,
3351                 REG_Y27,
3352                 REG_Y28,
3353                 REG_Y29,
3354                 REG_Y30,
3355                 REG_Y31,
3356                 REG_Z8,
3357                 REG_Z9,
3358                 REG_Z10,
3359                 REG_Z11,
3360                 REG_Z12,
3361                 REG_Z13,
3362                 REG_Z14,
3363                 REG_Z15,
3364                 REG_Z16,
3365                 REG_Z17,
3366                 REG_Z18,
3367                 REG_Z19,
3368                 REG_Z20,
3369                 REG_Z21,
3370                 REG_Z22,
3371                 REG_Z23,
3372                 REG_Z24,
3373                 REG_Z25,
3374                 REG_Z26,
3375                 REG_Z27,
3376                 REG_Z28,
3377                 REG_Z29,
3378                 REG_Z30,
3379                 REG_Z31:
3380                 if ctxt.Arch.Family == sys.I386 {
3381                         goto bad
3382                 }
3383                 fallthrough
3384
3385         case REG_AX,
3386                 REG_CX,
3387                 REG_DX,
3388                 REG_BX,
3389                 REG_BP,
3390                 REG_SI,
3391                 REG_DI,
3392                 REG_X0,
3393                 REG_X1,
3394                 REG_X2,
3395                 REG_X3,
3396                 REG_X4,
3397                 REG_X5,
3398                 REG_X6,
3399                 REG_X7,
3400                 REG_Y0,
3401                 REG_Y1,
3402                 REG_Y2,
3403                 REG_Y3,
3404                 REG_Y4,
3405                 REG_Y5,
3406                 REG_Y6,
3407                 REG_Y7,
3408                 REG_Z0,
3409                 REG_Z1,
3410                 REG_Z2,
3411                 REG_Z3,
3412                 REG_Z4,
3413                 REG_Z5,
3414                 REG_Z6,
3415                 REG_Z7:
3416                 i = reg[index] << 3
3417         }
3418
3419         switch scale {
3420         default:
3421                 goto bad
3422
3423         case 1:
3424                 break
3425
3426         case 2:
3427                 i |= 1 << 6
3428
3429         case 4:
3430                 i |= 2 << 6
3431
3432         case 8:
3433                 i |= 3 << 6
3434         }
3435
3436 bas:
3437         switch base {
3438         default:
3439                 goto bad
3440
3441         case REG_NONE: // must be mod=00
3442                 i |= 5
3443
3444         case REG_R8,
3445                 REG_R9,
3446                 REG_R10,
3447                 REG_R11,
3448                 REG_R12,
3449                 REG_R13,
3450                 REG_R14,
3451                 REG_R15:
3452                 if ctxt.Arch.Family == sys.I386 {
3453                         goto bad
3454                 }
3455                 fallthrough
3456
3457         case REG_AX,
3458                 REG_CX,
3459                 REG_DX,
3460                 REG_BX,
3461                 REG_SP,
3462                 REG_BP,
3463                 REG_SI,
3464                 REG_DI:
3465                 i |= reg[base]
3466         }
3467
3468         ab.Put1(byte(i))
3469         return
3470
3471 bad:
3472         ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
3473         ab.Put1(0)
3474 }
3475
3476 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
3477         var rel obj.Reloc
3478
3479         v := vaddr(ctxt, p, a, &rel)
3480         if rel.Siz != 0 {
3481                 if rel.Siz != 4 {
3482                         ctxt.Diag("bad reloc")
3483                 }
3484                 r := obj.Addrel(cursym)
3485                 *r = rel
3486                 r.Off = int32(p.Pc + int64(ab.Len()))
3487         }
3488
3489         ab.PutInt32(int32(v))
3490 }
3491
3492 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
3493         if r != nil {
3494                 *r = obj.Reloc{}
3495         }
3496
3497         switch a.Name {
3498         case obj.NAME_STATIC,
3499                 obj.NAME_GOTREF,
3500                 obj.NAME_EXTERN:
3501                 s := a.Sym
3502                 if r == nil {
3503                         ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3504                         log.Fatalf("reloc")
3505                 }
3506
3507                 if a.Name == obj.NAME_GOTREF {
3508                         r.Siz = 4
3509                         r.Type = objabi.R_GOTPCREL
3510                 } else if useAbs(ctxt, s) {
3511                         r.Siz = 4
3512                         r.Type = objabi.R_ADDR
3513                 } else {
3514                         r.Siz = 4
3515                         r.Type = objabi.R_PCREL
3516                 }
3517
3518                 r.Off = -1 // caller must fill in
3519                 r.Sym = s
3520                 r.Add = a.Offset
3521
3522                 return 0
3523         }
3524
3525         if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
3526                 if r == nil {
3527                         ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3528                         log.Fatalf("reloc")
3529                 }
3530
3531                 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
3532                         r.Type = objabi.R_TLS_LE
3533                         r.Siz = 4
3534                         r.Off = -1 // caller must fill in
3535                         r.Add = a.Offset
3536                 }
3537                 return 0
3538         }
3539
3540         return a.Offset
3541 }
3542
3543 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
3544         var base int
3545         var rel obj.Reloc
3546
3547         rex &= 0x40 | Rxr
3548         if a.Offset != int64(int32(a.Offset)) {
3549                 // The rules are slightly different for 386 and AMD64,
3550                 // mostly for historical reasons. We may unify them later,
3551                 // but it must be discussed beforehand.
3552                 //
3553                 // For 64bit mode only LEAL is allowed to overflow.
3554                 // It's how https://golang.org/cl/59630 made it.
3555                 // crypto/sha1/sha1block_amd64.s depends on this feature.
3556                 //
3557                 // For 32bit mode rules are more permissive.
3558                 // If offset fits uint32, it's permitted.
3559                 // This is allowed for assembly that wants to use 32-bit hex
3560                 // constants, e.g. LEAL 0x99999999(AX), AX.
3561                 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
3562                         (ctxt.Arch.Family != sys.AMD64 &&
3563                                 int64(uint32(a.Offset)) == a.Offset &&
3564                                 ab.rexflag&Rxw == 0)
3565                 if !overflowOK {
3566                         ctxt.Diag("offset too large in %s", p)
3567                 }
3568         }
3569         v := int32(a.Offset)
3570         rel.Siz = 0
3571
3572         switch a.Type {
3573         case obj.TYPE_ADDR:
3574                 if a.Name == obj.NAME_NONE {
3575                         ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
3576                 }
3577                 if a.Index == REG_TLS {
3578                         ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
3579                 }
3580                 goto bad
3581
3582         case obj.TYPE_REG:
3583                 const regFirst = REG_AL
3584                 const regLast = REG_Z31
3585                 if a.Reg < regFirst || regLast < a.Reg {
3586                         goto bad
3587                 }
3588                 if v != 0 {
3589                         goto bad
3590                 }
3591                 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
3592                 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
3593                 return
3594         }
3595
3596         if a.Type != obj.TYPE_MEM {
3597                 goto bad
3598         }
3599
3600         if a.Index != REG_NONE && a.Index != REG_TLS {
3601                 base := int(a.Reg)
3602                 switch a.Name {
3603                 case obj.NAME_EXTERN,
3604                         obj.NAME_GOTREF,
3605                         obj.NAME_STATIC:
3606                         if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
3607                                 goto bad
3608                         }
3609                         if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3610                                 // The base register has already been set. It holds the PC
3611                                 // of this instruction returned by a PC-reading thunk.
3612                                 // See obj6.go:rewriteToPcrel.
3613                         } else {
3614                                 base = REG_NONE
3615                         }
3616                         v = int32(vaddr(ctxt, p, a, &rel))
3617
3618                 case obj.NAME_AUTO,
3619                         obj.NAME_PARAM:
3620                         base = REG_SP
3621                 }
3622
3623                 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
3624                 if base == REG_NONE {
3625                         ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3626                         ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3627                         goto putrelv
3628                 }
3629
3630                 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3631                         ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3632                         ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3633                         return
3634                 }
3635
3636                 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3637                         ab.Put1(byte(1<<6 | 4<<0 | r<<3))
3638                         ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3639                         ab.Put1(disp8)
3640                         return
3641                 }
3642
3643                 ab.Put1(byte(2<<6 | 4<<0 | r<<3))
3644                 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3645                 goto putrelv
3646         }
3647
3648         base = int(a.Reg)
3649         switch a.Name {
3650         case obj.NAME_STATIC,
3651                 obj.NAME_GOTREF,
3652                 obj.NAME_EXTERN:
3653                 if a.Sym == nil {
3654                         ctxt.Diag("bad addr: %v", p)
3655                 }
3656                 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3657                         // The base register has already been set. It holds the PC
3658                         // of this instruction returned by a PC-reading thunk.
3659                         // See obj6.go:rewriteToPcrel.
3660                 } else {
3661                         base = REG_NONE
3662                 }
3663                 v = int32(vaddr(ctxt, p, a, &rel))
3664
3665         case obj.NAME_AUTO,
3666                 obj.NAME_PARAM:
3667                 base = REG_SP
3668         }
3669
3670         if base == REG_TLS {
3671                 v = int32(vaddr(ctxt, p, a, &rel))
3672         }
3673
3674         ab.rexflag |= regrex[base]&Rxb | rex
3675         if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
3676                 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
3677                         if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
3678                                 ctxt.Diag("%v has offset against gotref", p)
3679                         }
3680                         ab.Put1(byte(0<<6 | 5<<0 | r<<3))
3681                         goto putrelv
3682                 }
3683
3684                 // temporary
3685                 ab.Put2(
3686                         byte(0<<6|4<<0|r<<3), // sib present
3687                         0<<6|4<<3|5<<0,       // DS:d32
3688                 )
3689                 goto putrelv
3690         }
3691
3692         if base == REG_SP || base == REG_R12 {
3693                 if v == 0 {
3694                         ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3695                         ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3696                         return
3697                 }
3698
3699                 if disp8, ok := toDisp8(v, p, ab); ok {
3700                         ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
3701                         ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3702                         ab.Put1(disp8)
3703                         return
3704                 }
3705
3706                 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3707                 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3708                 goto putrelv
3709         }
3710
3711         if REG_AX <= base && base <= REG_R15 {
3712                 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
3713                         rel = obj.Reloc{}
3714                         rel.Type = objabi.R_TLS_LE
3715                         rel.Siz = 4
3716                         rel.Sym = nil
3717                         rel.Add = int64(v)
3718                         v = 0
3719                 }
3720
3721                 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3722                         ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3723                         return
3724                 }
3725
3726                 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3727                         ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
3728                         return
3729                 }
3730
3731                 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3732                 goto putrelv
3733         }
3734
3735         goto bad
3736
3737 putrelv:
3738         if rel.Siz != 0 {
3739                 if rel.Siz != 4 {
3740                         ctxt.Diag("bad rel")
3741                         goto bad
3742                 }
3743
3744                 r := obj.Addrel(cursym)
3745                 *r = rel
3746                 r.Off = int32(p.Pc + int64(ab.Len()))
3747         }
3748
3749         ab.PutInt32(v)
3750         return
3751
3752 bad:
3753         ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
3754 }
3755
3756 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
3757         ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
3758 }
3759
3760 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
3761         ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
3762 }
3763
3764 func bytereg(a *obj.Addr, t *uint8) {
3765         if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
3766                 a.Reg += REG_AL - REG_AX
3767                 *t = 0
3768         }
3769 }
3770
3771 func unbytereg(a *obj.Addr, t *uint8) {
3772         if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
3773                 a.Reg += REG_AX - REG_AL
3774                 *t = 0
3775         }
3776 }
3777
3778 const (
3779         movLit uint8 = iota // Like Zlit
3780         movRegMem
3781         movMemReg
3782         movRegMem2op
3783         movMemReg2op
3784         movFullPtr // Load full pointer, trash heap (unsupported)
3785         movDoubleShift
3786         movTLSReg
3787 )
3788
3789 var ymovtab = []movtab{
3790         // push
3791         {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
3792         {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
3793         {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
3794         {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
3795         {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3796         {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3797         {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3798         {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3799         {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
3800         {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
3801         {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
3802         {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
3803         {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
3804         {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
3805
3806         // pop
3807         {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
3808         {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
3809         {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
3810         {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3811         {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3812         {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3813         {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3814         {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
3815         {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
3816         {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
3817         {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
3818         {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
3819
3820         // mov seg
3821         {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
3822         {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
3823         {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
3824         {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
3825         {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
3826         {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
3827         {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
3828         {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
3829         {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
3830         {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
3831         {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
3832         {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
3833
3834         // mov cr
3835         {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3836         {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3837         {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3838         {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3839         {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3840         {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3841         {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3842         {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3843         {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3844         {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3845         {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3846         {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3847         {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3848         {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3849         {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3850         {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3851         {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3852         {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3853         {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3854         {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3855
3856         // mov dr
3857         {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3858         {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3859         {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3860         {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3861         {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
3862         {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
3863         {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3864         {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3865         {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3866         {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3867         {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3868         {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3869         {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
3870         {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
3871         {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3872         {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3873
3874         // mov tr
3875         {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
3876         {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
3877         {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
3878         {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
3879
3880         // lgdt, sgdt, lidt, sidt
3881         {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3882         {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3883         {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3884         {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3885         {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3886         {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3887         {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3888         {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3889
3890         // lldt, sldt
3891         {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
3892         {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
3893
3894         // lmsw, smsw
3895         {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
3896         {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
3897
3898         // ltr, str
3899         {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
3900         {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
3901
3902         /* load full pointer - unsupported
3903         {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
3904         {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
3905         */
3906
3907         // double shift
3908         {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3909         {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3910         {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3911         {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3912         {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3913         {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3914         {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3915         {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3916         {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3917         {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3918         {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3919         {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3920         {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3921         {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3922         {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3923         {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3924         {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3925         {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3926
3927         // load TLS base
3928         {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3929         {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3930         {0, 0, 0, 0, 0, [4]uint8{}},
3931 }
3932
3933 func isax(a *obj.Addr) bool {
3934         switch a.Reg {
3935         case REG_AX, REG_AL, REG_AH:
3936                 return true
3937         }
3938
3939         if a.Index == REG_AX {
3940                 return true
3941         }
3942         return false
3943 }
3944
3945 func subreg(p *obj.Prog, from int, to int) {
3946         if false { /* debug['Q'] */
3947                 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
3948         }
3949
3950         if int(p.From.Reg) == from {
3951                 p.From.Reg = int16(to)
3952                 p.Ft = 0
3953         }
3954
3955         if int(p.To.Reg) == from {
3956                 p.To.Reg = int16(to)
3957                 p.Tt = 0
3958         }
3959
3960         if int(p.From.Index) == from {
3961                 p.From.Index = int16(to)
3962                 p.Ft = 0
3963         }
3964
3965         if int(p.To.Index) == from {
3966                 p.To.Index = int16(to)
3967                 p.Tt = 0
3968         }
3969
3970         if false { /* debug['Q'] */
3971                 fmt.Printf("%v\n", p)
3972         }
3973 }
3974
3975 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
3976         switch op {
3977         case Pm, Pe, Pf2, Pf3:
3978                 if osize != 1 {
3979                         if op != Pm {
3980                                 ab.Put1(byte(op))
3981                         }
3982                         ab.Put1(Pm)
3983                         z++
3984                         op = int(o.op[z])
3985                         break
3986                 }
3987                 fallthrough
3988
3989         default:
3990                 if ab.Len() == 0 || ab.Last() != Pm {
3991                         ab.Put1(Pm)
3992                 }
3993         }
3994
3995         ab.Put1(byte(op))
3996         return z
3997 }
3998
3999 var bpduff1 = []byte{
4000         0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
4001         0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
4002 }
4003
4004 var bpduff2 = []byte{
4005         0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
4006 }
4007
4008 // asmevex emits EVEX pregis and opcode byte.
4009 // In addition to asmvex r/m, vvvv and reg fields also requires optional
4010 // K-masking register.
4011 //
4012 // Expects asmbuf.evex to be properly initialized.
4013 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
4014         ab.evexflag = true
4015         evex := ab.evex
4016
4017         rexR := byte(1)
4018         evexR := byte(1)
4019         rexX := byte(1)
4020         rexB := byte(1)
4021         if r != nil {
4022                 if regrex[r.Reg]&Rxr != 0 {
4023                         rexR = 0 // "ModR/M.reg" selector 4th bit.
4024                 }
4025                 if regrex[r.Reg]&RxrEvex != 0 {
4026                         evexR = 0 // "ModR/M.reg" selector 5th bit.
4027                 }
4028         }
4029         if rm != nil {
4030                 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
4031                         rexX = 0
4032                 } else if regrex[rm.Index]&Rxx != 0 {
4033                         rexX = 0
4034                 }
4035                 if regrex[rm.Reg]&Rxb != 0 {
4036                         rexB = 0
4037                 }
4038         }
4039         // P0 = [R][X][B][R'][00][mm]
4040         p0 := (rexR << 7) |
4041                 (rexX << 6) |
4042                 (rexB << 5) |
4043                 (evexR << 4) |
4044                 (0 << 2) |
4045                 (evex.M() << 0)
4046
4047         vexV := byte(0)
4048         if v != nil {
4049                 // 4bit-wide reg index.
4050                 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4051         }
4052         vexV ^= 0x0F
4053         // P1 = [W][vvvv][1][pp]
4054         p1 := (evex.W() << 7) |
4055                 (vexV << 3) |
4056                 (1 << 2) |
4057                 (evex.P() << 0)
4058
4059         suffix := evexSuffixMap[p.Scond]
4060         evexZ := byte(0)
4061         evexLL := evex.L()
4062         evexB := byte(0)
4063         evexV := byte(1)
4064         evexA := byte(0)
4065         if suffix.zeroing {
4066                 if !evex.ZeroingEnabled() {
4067                         ctxt.Diag("unsupported zeroing: %v", p)
4068                 }
4069                 evexZ = 1
4070         }
4071         switch {
4072         case suffix.rounding != rcUnset:
4073                 if rm != nil && rm.Type == obj.TYPE_MEM {
4074                         ctxt.Diag("illegal rounding with memory argument: %v", p)
4075                 } else if !evex.RoundingEnabled() {
4076                         ctxt.Diag("unsupported rounding: %v", p)
4077                 }
4078                 evexB = 1
4079                 evexLL = suffix.rounding
4080         case suffix.broadcast:
4081                 if rm == nil || rm.Type != obj.TYPE_MEM {
4082                         ctxt.Diag("illegal broadcast without memory argument: %v", p)
4083                 } else if !evex.BroadcastEnabled() {
4084                         ctxt.Diag("unsupported broadcast: %v", p)
4085                 }
4086                 evexB = 1
4087         case suffix.sae:
4088                 if rm != nil && rm.Type == obj.TYPE_MEM {
4089                         ctxt.Diag("illegal SAE with memory argument: %v", p)
4090                 } else if !evex.SaeEnabled() {
4091                         ctxt.Diag("unsupported SAE: %v", p)
4092                 }
4093                 evexB = 1
4094         }
4095         if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
4096                 evexV = 0
4097         } else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
4098                 evexV = 0 // VSR selector 5th bit.
4099         }
4100         if k != nil {
4101                 evexA = byte(reg[k.Reg])
4102         }
4103         // P2 = [z][L'L][b][V'][aaa]
4104         p2 := (evexZ << 7) |
4105                 (evexLL << 5) |
4106                 (evexB << 4) |
4107                 (evexV << 3) |
4108                 (evexA << 0)
4109
4110         const evexEscapeByte = 0x62
4111         ab.Put4(evexEscapeByte, p0, p1, p2)
4112         ab.Put1(evex.opcode)
4113 }
4114
4115 // Emit VEX prefix and opcode byte.
4116 // The three addresses are the r/m, vvvv, and reg fields.
4117 // The reg and rm arguments appear in the same order as the
4118 // arguments to asmand, which typically follows the call to asmvex.
4119 // The final two arguments are the VEX prefix (see encoding above)
4120 // and the opcode byte.
4121 // For details about vex prefix see:
4122 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
4123 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
4124         ab.vexflag = true
4125         rexR := 0
4126         if r != nil {
4127                 rexR = regrex[r.Reg] & Rxr
4128         }
4129         rexB := 0
4130         rexX := 0
4131         if rm != nil {
4132                 rexB = regrex[rm.Reg] & Rxb
4133                 rexX = regrex[rm.Index] & Rxx
4134         }
4135         vexM := (vex >> 3) & 0x7
4136         vexWLP := vex & 0x87
4137         vexV := byte(0)
4138         if v != nil {
4139                 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4140         }
4141         vexV ^= 0xF
4142         if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
4143                 // Can use 2-byte encoding.
4144                 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
4145         } else {
4146                 // Must use 3-byte encoding.
4147                 ab.Put3(0xc4,
4148                         (byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
4149                         vexV<<3|vexWLP,
4150                 )
4151         }
4152         ab.Put1(opcode)
4153 }
4154
4155 // regIndex returns register index that fits in 5 bits.
4156 //
4157 //      R         : 3 bit | legacy instructions     | N/A
4158 //      [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
4159 //      EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
4160 //
4161 // Examples:
4162 //      REG_Z30 => 30
4163 //      REG_X15 => 15
4164 //      REG_R9  => 9
4165 //      REG_AX  => 0
4166 //
4167 func regIndex(r int16) int {
4168         lower3bits := reg[r]
4169         high4bit := regrex[r] & Rxr << 1
4170         high5bit := regrex[r] & RxrEvex << 0
4171         return lower3bits | high4bit | high5bit
4172 }
4173
4174 // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
4175 // Reports errors via ctxt.
4176 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4177         // If any pair of the index, mask, or destination registers
4178         // are the same, illegal instruction trap (#UD) is triggered.
4179         index := regIndex(p.GetFrom3().Index)
4180         mask := regIndex(p.From.Reg)
4181         dest := regIndex(p.To.Reg)
4182         if dest == mask || dest == index || mask == index {
4183                 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
4184                 return false
4185         }
4186
4187         return true
4188 }
4189
4190 // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
4191 // Reports errors via ctxt.
4192 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4193         // Illegal instruction trap (#UD) is triggered if the destination vector
4194         // register is the same as index vector in VSIB.
4195         index := regIndex(p.From.Index)
4196         dest := regIndex(p.To.Reg)
4197         if dest == index {
4198                 ctxt.Diag("index and destination registers should be distinct: %v", p)
4199                 return false
4200         }
4201
4202         return true
4203 }
4204
4205 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
4206         o := opindex[p.As&obj.AMask]
4207
4208         if o == nil {
4209                 ctxt.Diag("asmins: missing op %v", p)
4210                 return
4211         }
4212
4213         if pre := prefixof(ctxt, &p.From); pre != 0 {
4214                 ab.Put1(byte(pre))
4215         }
4216         if pre := prefixof(ctxt, &p.To); pre != 0 {
4217                 ab.Put1(byte(pre))
4218         }
4219
4220         // Checks to warn about instruction/arguments combinations that
4221         // will unconditionally trigger illegal instruction trap (#UD).
4222         switch p.As {
4223         case AVGATHERDPD,
4224                 AVGATHERQPD,
4225                 AVGATHERDPS,
4226                 AVGATHERQPS,
4227                 AVPGATHERDD,
4228                 AVPGATHERQD,
4229                 AVPGATHERDQ,
4230                 AVPGATHERQQ:
4231                 // AVX512 gather requires explicit K mask.
4232                 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
4233                         if !avx512gatherValid(ctxt, p) {
4234                                 return
4235                         }
4236                 } else {
4237                         if !avx2gatherValid(ctxt, p) {
4238                                 return
4239                         }
4240                 }
4241         }
4242
4243         if p.Ft == 0 {
4244                 p.Ft = uint8(oclass(ctxt, p, &p.From))
4245         }
4246         if p.Tt == 0 {
4247                 p.Tt = uint8(oclass(ctxt, p, &p.To))
4248         }
4249
4250         ft := int(p.Ft) * Ymax
4251         var f3t int
4252         tt := int(p.Tt) * Ymax
4253
4254         xo := obj.Bool2int(o.op[0] == 0x0f)
4255         z := 0
4256         var a *obj.Addr
4257         var l int
4258         var op int
4259         var q *obj.Prog
4260         var r *obj.Reloc
4261         var rel obj.Reloc
4262         var v int64
4263
4264         args := make([]int, 0, argListMax)
4265         if ft != Ynone*Ymax {
4266                 args = append(args, ft)
4267         }
4268         for i := range p.RestArgs {
4269                 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
4270         }
4271         if tt != Ynone*Ymax {
4272                 args = append(args, tt)
4273         }
4274
4275         for _, yt := range o.ytab {
4276                 // ytab matching is purely args-based,
4277                 // but AVX512 suffixes like "Z" or "RU_SAE" will
4278                 // add EVEX-only filter that will reject non-EVEX matches.
4279                 //
4280                 // Consider "VADDPD.BCST 2032(DX), X0, X0".
4281                 // Without this rule, operands will lead to VEX-encoded form
4282                 // and produce "c5b15813" encoding.
4283                 if !yt.match(args) {
4284                         // "xo" is always zero for VEX/EVEX encoded insts.
4285                         z += int(yt.zoffset) + xo
4286                 } else {
4287                         if p.Scond != 0 && !evexZcase(yt.zcase) {
4288                                 // Do not signal error and continue to search
4289                                 // for matching EVEX-encoded form.
4290                                 z += int(yt.zoffset)
4291                                 continue
4292                         }
4293
4294                         switch o.prefix {
4295                         case Px1: // first option valid only in 32-bit mode
4296                                 if ctxt.Arch.Family == sys.AMD64 && z == 0 {
4297                                         z += int(yt.zoffset) + xo
4298                                         continue
4299                                 }
4300                         case Pq: // 16 bit escape and opcode escape
4301                                 ab.Put2(Pe, Pm)
4302
4303                         case Pq3: // 16 bit escape and opcode escape + REX.W
4304                                 ab.rexflag |= Pw
4305                                 ab.Put2(Pe, Pm)
4306
4307                         case Pq4: // 66 0F 38
4308                                 ab.Put3(0x66, 0x0F, 0x38)
4309
4310                         case Pq4w: // 66 0F 38 + REX.W
4311                                 ab.rexflag |= Pw
4312                                 ab.Put3(0x66, 0x0F, 0x38)
4313
4314                         case Pq5: // F3 0F 38
4315                                 ab.Put3(0xF3, 0x0F, 0x38)
4316
4317                         case Pq5w: //  F3 0F 38 + REX.W
4318                                 ab.rexflag |= Pw
4319                                 ab.Put3(0xF3, 0x0F, 0x38)
4320
4321                         case Pf2, // xmm opcode escape
4322                                 Pf3:
4323                                 ab.Put2(o.prefix, Pm)
4324
4325                         case Pef3:
4326                                 ab.Put3(Pe, Pf3, Pm)
4327
4328                         case Pfw: // xmm opcode escape + REX.W
4329                                 ab.rexflag |= Pw
4330                                 ab.Put2(Pf3, Pm)
4331
4332                         case Pm: // opcode escape
4333                                 ab.Put1(Pm)
4334
4335                         case Pe: // 16 bit escape
4336                                 ab.Put1(Pe)
4337
4338                         case Pw: // 64-bit escape
4339                                 if ctxt.Arch.Family != sys.AMD64 {
4340                                         ctxt.Diag("asmins: illegal 64: %v", p)
4341                                 }
4342                                 ab.rexflag |= Pw
4343
4344                         case Pw8: // 64-bit escape if z >= 8
4345                                 if z >= 8 {
4346                                         if ctxt.Arch.Family != sys.AMD64 {
4347                                                 ctxt.Diag("asmins: illegal 64: %v", p)
4348                                         }
4349                                         ab.rexflag |= Pw
4350                                 }
4351
4352                         case Pb: // botch
4353                                 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
4354                                         goto bad
4355                                 }
4356                                 // NOTE(rsc): This is probably safe to do always,
4357                                 // but when enabled it chooses different encodings
4358                                 // than the old cmd/internal/obj/i386 code did,
4359                                 // which breaks our "same bits out" checks.
4360                                 // In particular, CMPB AX, $0 encodes as 80 f8 00
4361                                 // in the original obj/i386, and it would encode
4362                                 // (using a valid, shorter form) as 3c 00 if we enabled
4363                                 // the call to bytereg here.
4364                                 if ctxt.Arch.Family == sys.AMD64 {
4365                                         bytereg(&p.From, &p.Ft)
4366                                         bytereg(&p.To, &p.Tt)
4367                                 }
4368
4369                         case P32: // 32 bit but illegal if 64-bit mode
4370                                 if ctxt.Arch.Family == sys.AMD64 {
4371                                         ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
4372                                 }
4373
4374                         case Py: // 64-bit only, no prefix
4375                                 if ctxt.Arch.Family != sys.AMD64 {
4376                                         ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4377                                 }
4378
4379                         case Py1: // 64-bit only if z < 1, no prefix
4380                                 if z < 1 && ctxt.Arch.Family != sys.AMD64 {
4381                                         ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4382                                 }
4383
4384                         case Py3: // 64-bit only if z < 3, no prefix
4385                                 if z < 3 && ctxt.Arch.Family != sys.AMD64 {
4386                                         ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4387                                 }
4388                         }
4389
4390                         if z >= len(o.op) {
4391                                 log.Fatalf("asmins bad table %v", p)
4392                         }
4393                         op = int(o.op[z])
4394                         if op == 0x0f {
4395                                 ab.Put1(byte(op))
4396                                 z++
4397                                 op = int(o.op[z])
4398                         }
4399
4400                         switch yt.zcase {
4401                         default:
4402                                 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
4403                                 return
4404
4405                         case Zpseudo:
4406                                 break
4407
4408                         case Zlit:
4409                                 ab.PutOpBytesLit(z, &o.op)
4410
4411                         case Zlitr_m:
4412                                 ab.PutOpBytesLit(z, &o.op)
4413                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4414
4415                         case Zlitm_r:
4416                                 ab.PutOpBytesLit(z, &o.op)
4417                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4418
4419                         case Zlit_m_r:
4420                                 ab.PutOpBytesLit(z, &o.op)
4421                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4422
4423                         case Zmb_r:
4424                                 bytereg(&p.From, &p.Ft)
4425                                 fallthrough
4426
4427                         case Zm_r:
4428                                 ab.Put1(byte(op))
4429                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4430
4431                         case Z_m_r:
4432                                 ab.Put1(byte(op))
4433                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4434
4435                         case Zm2_r:
4436                                 ab.Put2(byte(op), o.op[z+1])
4437                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4438
4439                         case Zm_r_xm:
4440                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4441                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4442
4443                         case Zm_r_xm_nr:
4444                                 ab.rexflag = 0
4445                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4446                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4447
4448                         case Zm_r_i_xm:
4449                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4450                                 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
4451                                 ab.Put1(byte(p.To.Offset))
4452
4453                         case Zibm_r, Zibr_m:
4454                                 ab.PutOpBytesLit(z, &o.op)
4455                                 if yt.zcase == Zibr_m {
4456                                         ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4457                                 } else {
4458                                         ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4459                                 }
4460                                 switch {
4461                                 default:
4462                                         ab.Put1(byte(p.From.Offset))
4463                                 case yt.args[0] == Yi32 && o.prefix == Pe:
4464                                         ab.PutInt16(int16(p.From.Offset))
4465                                 case yt.args[0] == Yi32:
4466                                         ab.PutInt32(int32(p.From.Offset))
4467                                 }
4468
4469                         case Zaut_r:
4470                                 ab.Put1(0x8d) // leal
4471                                 if p.From.Type != obj.TYPE_ADDR {
4472                                         ctxt.Diag("asmins: Zaut sb type ADDR")
4473                                 }
4474                                 p.From.Type = obj.TYPE_MEM
4475                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4476                                 p.From.Type = obj.TYPE_ADDR
4477
4478                         case Zm_o:
4479                                 ab.Put1(byte(op))
4480                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4481
4482                         case Zr_m:
4483                                 ab.Put1(byte(op))
4484                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4485
4486                         case Zvex:
4487                                 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4488
4489                         case Zvex_rm_v_r:
4490                                 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4491                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4492
4493                         case Zvex_rm_v_ro:
4494                                 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4495                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4496
4497                         case Zvex_i_rm_vo:
4498                                 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4499                                 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
4500                                 ab.Put1(byte(p.From.Offset))
4501
4502                         case Zvex_i_r_v:
4503                                 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4504                                 regnum := byte(0x7)
4505                                 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
4506                                         regnum &= byte(p.GetFrom3().Reg - REG_X0)
4507                                 } else {
4508                                         regnum &= byte(p.GetFrom3().Reg - REG_Y0)
4509                                 }
4510                                 ab.Put1(o.op[z+2] | regnum)
4511                                 ab.Put1(byte(p.From.Offset))
4512
4513                         case Zvex_i_rm_v_r:
4514                                 imm, from, from3, to := unpackOps4(p)
4515                                 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4516                                 ab.asmand(ctxt, cursym, p, from, to)
4517                                 ab.Put1(byte(imm.Offset))
4518
4519                         case Zvex_i_rm_r:
4520                                 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
4521                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4522                                 ab.Put1(byte(p.From.Offset))
4523
4524                         case Zvex_v_rm_r:
4525                                 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
4526                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4527
4528                         case Zvex_r_v_rm:
4529                                 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
4530                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4531
4532                         case Zvex_rm_r_vo:
4533                                 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
4534                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4535
4536                         case Zvex_i_r_rm:
4537                                 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
4538                                 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4539                                 ab.Put1(byte(p.From.Offset))
4540
4541                         case Zvex_hr_rm_v_r:
4542                                 hr, from, from3, to := unpackOps4(p)
4543                                 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4544                                 ab.asmand(ctxt, cursym, p, from, to)
4545                                 ab.Put1(byte(regIndex(hr.Reg) << 4))
4546
4547                         case Zevex_k_rmo:
4548                                 ab.evex = newEVEXBits(z, &o.op)
4549                                 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
4550                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
4551
4552                         case Zevex_i_rm_vo:
4553                                 ab.evex = newEVEXBits(z, &o.op)
4554                                 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
4555                                 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
4556                                 ab.Put1(byte(p.From.Offset))
4557
4558                         case Zevex_i_rm_k_vo:
4559                                 imm, from, kmask, to := unpackOps4(p)
4560                                 ab.evex = newEVEXBits(z, &o.op)
4561                                 ab.asmevex(ctxt, p, from, to, nil, kmask)
4562                                 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
4563                                 ab.Put1(byte(imm.Offset))
4564
4565                         case Zevex_i_r_rm:
4566                                 ab.evex = newEVEXBits(z, &o.op)
4567                                 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
4568                                 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4569                                 ab.Put1(byte(p.From.Offset))
4570
4571                         case Zevex_i_r_k_rm:
4572                                 imm, from, kmask, to := unpackOps4(p)
4573                                 ab.evex = newEVEXBits(z, &o.op)
4574                                 ab.asmevex(ctxt, p, to, nil, from, kmask)
4575                                 ab.asmand(ctxt, cursym, p, to, from)
4576                                 ab.Put1(byte(imm.Offset))
4577
4578                         case Zevex_i_rm_r:
4579                                 ab.evex = newEVEXBits(z, &o.op)
4580                                 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
4581                                 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4582                                 ab.Put1(byte(p.From.Offset))
4583
4584                         case Zevex_i_rm_k_r:
4585                                 imm, from, kmask, to := unpackOps4(p)
4586                                 ab.evex = newEVEXBits(z, &o.op)
4587                                 ab.asmevex(ctxt, p, from, nil, to, kmask)
4588                                 ab.asmand(ctxt, cursym, p, from, to)
4589                                 ab.Put1(byte(imm.Offset))
4590
4591                         case Zevex_i_rm_v_r:
4592                                 imm, from, from3, to := unpackOps4(p)
4593                                 ab.evex = newEVEXBits(z, &o.op)
4594                                 ab.asmevex(ctxt, p, from, from3, to, nil)
4595                                 ab.asmand(ctxt, cursym, p, from, to)
4596                                 ab.Put1(byte(imm.Offset))
4597
4598                         case Zevex_i_rm_v_k_r:
4599                                 imm, from, from3, kmask, to := unpackOps5(p)
4600                                 ab.evex = newEVEXBits(z, &o.op)
4601                                 ab.asmevex(ctxt, p, from, from3, to, kmask)
4602                                 ab.asmand(ctxt, cursym, p, from, to)
4603                                 ab.Put1(byte(imm.Offset))
4604
4605                         case Zevex_r_v_rm:
4606                                 ab.evex = newEVEXBits(z, &o.op)
4607                                 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
4608                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4609
4610                         case Zevex_rm_v_r:
4611                                 ab.evex = newEVEXBits(z, &o.op)
4612                                 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
4613                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4614
4615                         case Zevex_rm_k_r:
4616                                 ab.evex = newEVEXBits(z, &o.op)
4617                                 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
4618                                 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4619
4620                         case Zevex_r_k_rm:
4621                                 ab.evex = newEVEXBits(z, &o.op)
4622                                 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
4623                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4624
4625                         case Zevex_rm_v_k_r:
4626                                 from, from3, kmask, to := unpackOps4(p)
4627                                 ab.evex = newEVEXBits(z, &o.op)
4628                                 ab.asmevex(ctxt, p, from, from3, to, kmask)
4629                                 ab.asmand(ctxt, cursym, p, from, to)
4630
4631                         case Zevex_r_v_k_rm:
4632                                 from, from3, kmask, to := unpackOps4(p)
4633                                 ab.evex = newEVEXBits(z, &o.op)
4634                                 ab.asmevex(ctxt, p, to, from3, from, kmask)
4635                                 ab.asmand(ctxt, cursym, p, to, from)
4636
4637                         case Zr_m_xm:
4638                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4639                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4640
4641                         case Zr_m_xm_nr:
4642                                 ab.rexflag = 0
4643                                 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4644                                 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4645
4646                         case Zo_m:
4647                                 ab.Put1(byte(op))
4648                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4649
4650                         case Zcallindreg:
4651                                 r = obj.Addrel(cursym)
4652                                 r.Off = int32(p.Pc)
4653                                 r.Type = objabi.R_CALLIND
4654                                 r.Siz = 0
4655                                 fallthrough
4656
4657                         case Zo_m64:
4658                                 ab.Put1(byte(op))
4659                                 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
4660
4661                         case Zm_ibo:
4662                                 ab.Put1(byte(op))
4663                                 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4664                                 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
4665
4666                         case Zibo_m:
4667                                 ab.Put1(byte(op))
4668                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4669                                 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4670
4671                         case Zibo_m_xm:
4672                                 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4673                                 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4674                                 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4675
4676                         case Z_ib, Zib_:
4677                                 if yt.zcase == Zib_ {
4678                                         a = &p.From
4679                                 } else {
4680                                         a = &p.To
4681                                 }
4682                                 ab.Put1(byte(op))
4683                                 if p.As == AXABORT {
4684                                         ab.Put1(o.op[z+1])
4685                                 }
4686                                 ab.Put1(byte(vaddr(ctxt, p, a, nil)))
4687
4688                         case Zib_rp:
4689                                 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4690                                 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
4691
4692                         case Zil_rp:
4693                                 ab.rexflag |= regrex[p.To.Reg] & Rxb
4694                                 ab.Put1(byte(op + reg[p.To.Reg]))
4695                                 if o.prefix == Pe {
4696                                         v = vaddr(ctxt, p, &p.From, nil)
4697                                         ab.PutInt16(int16(v))
4698                                 } else {
4699                                         ab.relput4(ctxt, cursym, p, &p.From)
4700                                 }
4701
4702                         case Zo_iw:
4703                                 ab.Put1(byte(op))
4704                                 if p.From.Type != obj.TYPE_NONE {
4705                                         v = vaddr(ctxt, p, &p.From, nil)
4706                                         ab.PutInt16(int16(v))
4707                                 }
4708
4709                         case Ziq_rp:
4710                                 v = vaddr(ctxt, p, &p.From, &rel)
4711                                 l = int(v >> 32)
4712                                 if l == 0 && rel.Siz != 8 {
4713                                         ab.rexflag &^= (0x40 | Rxw)
4714
4715                                         ab.rexflag |= regrex[p.To.Reg] & Rxb
4716                                         ab.Put1(byte(0xb8 + reg[p.To.Reg]))
4717                                         if rel.Type != 0 {
4718                                                 r = obj.Addrel(cursym)
4719                                                 *r = rel
4720                                                 r.Off = int32(p.Pc + int64(ab.Len()))
4721                                         }
4722
4723                                         ab.PutInt32(int32(v))
4724                                 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
4725                                         ab.Put1(0xc7)
4726                                         ab.asmando(ctxt, cursym, p, &p.To, 0)
4727
4728                                         ab.PutInt32(int32(v)) // need all 8
4729                                 } else {
4730                                         ab.rexflag |= regrex[p.To.Reg] & Rxb
4731                                         ab.Put1(byte(op + reg[p.To.Reg]))
4732                                         if rel.Type != 0 {
4733                                                 r = obj.Addrel(cursym)
4734                                                 *r = rel
4735                                                 r.Off = int32(p.Pc + int64(ab.Len()))
4736                                         }
4737
4738                                         ab.PutInt64(v)
4739                                 }
4740
4741                         case Zib_rr:
4742                                 ab.Put1(byte(op))
4743                                 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4744                                 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4745
4746                         case Z_il, Zil_:
4747                                 if yt.zcase == Zil_ {
4748                                         a = &p.From
4749                                 } else {
4750                                         a = &p.To
4751                                 }
4752                                 ab.Put1(byte(op))
4753                                 if o.prefix == Pe {
4754                                         v = vaddr(ctxt, p, a, nil)
4755                                         ab.PutInt16(int16(v))
4756                                 } else {
4757                                         ab.relput4(ctxt, cursym, p, a)
4758                                 }
4759
4760                         case Zm_ilo, Zilo_m:
4761                                 ab.Put1(byte(op))
4762                                 if yt.zcase == Zilo_m {
4763                                         a = &p.From
4764                                         ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4765                                 } else {
4766                                         a = &p.To
4767                                         ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4768                                 }
4769
4770                                 if o.prefix == Pe {
4771                                         v = vaddr(ctxt, p, a, nil)
4772                                         ab.PutInt16(int16(v))
4773                                 } else {
4774                                         ab.relput4(ctxt, cursym, p, a)
4775                                 }
4776
4777                         case Zil_rr:
4778                                 ab.Put1(byte(op))
4779                                 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4780                                 if o.prefix == Pe {
4781                                         v = vaddr(ctxt, p, &p.From, nil)
4782                                         ab.PutInt16(int16(v))
4783                                 } else {
4784                                         ab.relput4(ctxt, cursym, p, &p.From)
4785                                 }
4786
4787                         case Z_rp:
4788                                 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4789                                 ab.Put1(byte(op + reg[p.To.Reg]))
4790
4791                         case Zrp_:
4792                                 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
4793                                 ab.Put1(byte(op + reg[p.From.Reg]))
4794
4795                         case Zcallcon, Zjmpcon:
4796                                 if yt.zcase == Zcallcon {
4797                                         ab.Put1(byte(op))
4798                                 } else {
4799                                         ab.Put1(o.op[z+1])
4800                                 }
4801                                 r = obj.Addrel(cursym)
4802                                 r.Off = int32(p.Pc + int64(ab.Len()))
4803                                 r.Type = objabi.R_PCREL
4804                                 r.Siz = 4
4805                                 r.Add = p.To.Offset
4806                                 ab.PutInt32(0)
4807
4808                         case Zcallind:
4809                                 ab.Put2(byte(op), o.op[z+1])
4810                                 r = obj.Addrel(cursym)
4811                                 r.Off = int32(p.Pc + int64(ab.Len()))
4812                                 if ctxt.Arch.Family == sys.AMD64 {
4813                                         r.Type = objabi.R_PCREL
4814                                 } else {
4815                                         r.Type = objabi.R_ADDR
4816                                 }
4817                                 r.Siz = 4
4818                                 r.Add = p.To.Offset
4819                                 r.Sym = p.To.Sym
4820                                 ab.PutInt32(0)
4821
4822                         case Zcall, Zcallduff:
4823                                 if p.To.Sym == nil {
4824                                         ctxt.Diag("call without target")
4825                                         ctxt.DiagFlush()
4826                                         log.Fatalf("bad code")
4827                                 }
4828
4829                                 if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
4830                                         ctxt.Diag("directly calling duff when dynamically linking Go")
4831                                 }
4832
4833                                 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4834                                         // Maintain BP around call, since duffcopy/duffzero can't do it
4835                                         // (the call jumps into the middle of the function).
4836                                         // This makes it possible to see call sites for duffcopy/duffzero in
4837                                         // BP-based profiling tools like Linux perf (which is the
4838                                         // whole point of maintaining frame pointers in Go).
4839                                         // MOVQ BP, -16(SP)
4840                                         // LEAQ -16(SP), BP
4841                                         ab.Put(bpduff1)
4842                                 }
4843                                 ab.Put1(byte(op))
4844                                 r = obj.Addrel(cursym)
4845                                 r.Off = int32(p.Pc + int64(ab.Len()))
4846                                 r.Sym = p.To.Sym
4847                                 r.Add = p.To.Offset
4848                                 r.Type = objabi.R_CALL
4849                                 r.Siz = 4
4850                                 ab.PutInt32(0)
4851
4852                                 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4853                                         // Pop BP pushed above.
4854                                         // MOVQ 0(BP), BP
4855                                         ab.Put(bpduff2)
4856                                 }
4857
4858                         // TODO: jump across functions needs reloc
4859                         case Zbr, Zjmp, Zloop:
4860                                 if p.As == AXBEGIN {
4861                                         ab.Put1(byte(op))
4862                                 }
4863                                 if p.To.Sym != nil {
4864                                         if yt.zcase != Zjmp {
4865                                                 ctxt.Diag("branch to ATEXT")
4866                                                 ctxt.DiagFlush()
4867                                                 log.Fatalf("bad code")
4868                                         }
4869
4870                                         ab.Put1(o.op[z+1])
4871                                         r = obj.Addrel(cursym)
4872                                         r.Off = int32(p.Pc + int64(ab.Len()))
4873                                         r.Sym = p.To.Sym
4874                                         // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
4875                                         // it can point to a trampoline instead of the destination itself.
4876                                         r.Type = objabi.R_CALL
4877                                         r.Siz = 4
4878                                         ab.PutInt32(0)
4879                                         break
4880                                 }
4881
4882                                 // Assumes q is in this function.
4883                                 // TODO: Check in input, preserve in brchain.
4884
4885                                 // Fill in backward jump now.
4886                                 q = p.To.Target()
4887
4888                                 if q == nil {
4889                                         ctxt.Diag("jmp/branch/loop without target")
4890                                         ctxt.DiagFlush()
4891                                         log.Fatalf("bad code")
4892                                 }
4893
4894                                 if p.Back&branchBackwards != 0 {
4895                                         v = q.Pc - (p.Pc + 2)
4896                                         if v >= -128 && p.As != AXBEGIN {
4897                                                 if p.As == AJCXZL {
4898                                                         ab.Put1(0x67)
4899                                                 }
4900                                                 ab.Put2(byte(op), byte(v))
4901                                         } else if yt.zcase == Zloop {
4902                                                 ctxt.Diag("loop too far: %v", p)
4903                                         } else {
4904                                                 v -= 5 - 2
4905                                                 if p.As == AXBEGIN {
4906                                                         v--
4907                                                 }
4908                                                 if yt.zcase == Zbr {
4909                                                         ab.Put1(0x0f)
4910                                                         v--
4911                                                 }
4912
4913                                                 ab.Put1(o.op[z+1])
4914                                                 ab.PutInt32(int32(v))
4915                                         }
4916
4917                                         break
4918                                 }
4919
4920                                 // Annotate target; will fill in later.
4921                                 p.Forwd = q.Rel
4922
4923                                 q.Rel = p
4924                                 if p.Back&branchShort != 0 && p.As != AXBEGIN {
4925                                         if p.As == AJCXZL {
4926                                                 ab.Put1(0x67)
4927                                         }
4928                                         ab.Put2(byte(op), 0)
4929                                 } else if yt.zcase == Zloop {
4930                                         ctxt.Diag("loop too far: %v", p)
4931                                 } else {
4932                                         if yt.zcase == Zbr {
4933                                                 ab.Put1(0x0f)
4934                                         }
4935                                         ab.Put1(o.op[z+1])
4936                                         ab.PutInt32(0)
4937                                 }
4938
4939                         case Zbyte:
4940                                 v = vaddr(ctxt, p, &p.From, &rel)
4941                                 if rel.Siz != 0 {
4942                                         rel.Siz = uint8(op)
4943                                         r = obj.Addrel(cursym)
4944                                         *r = rel
4945                                         r.Off = int32(p.Pc + int64(ab.Len()))
4946                                 }
4947
4948                                 ab.Put1(byte(v))
4949                                 if op > 1 {
4950                                         ab.Put1(byte(v >> 8))
4951                                         if op > 2 {
4952                                                 ab.PutInt16(int16(v >> 16))
4953                                                 if op > 4 {
4954                                                         ab.PutInt32(int32(v >> 32))
4955                                                 }
4956                                         }
4957                                 }
4958                         }
4959
4960                         return
4961                 }
4962         }
4963         f3t = Ynone * Ymax
4964         if p.GetFrom3() != nil {
4965                 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
4966         }
4967         for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
4968                 var pp obj.Prog
4969                 var t []byte
4970                 if p.As == mo[0].as {
4971                         if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
4972                                 t = mo[0].op[:]
4973                                 switch mo[0].code {
4974                                 default:
4975                                         ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
4976
4977                                 case movLit:
4978                                         for z = 0; t[z] != 0; z++ {
4979                                                 ab.Put1(t[z])
4980                                         }
4981
4982                                 case movRegMem:
4983                                         ab.Put1(t[0])
4984                                         ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
4985
4986                                 case movMemReg:
4987                                         ab.Put1(t[0])
4988                                         ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
4989
4990                                 case movRegMem2op: // r,m - 2op
4991                                         ab.Put2(t[0], t[1])
4992                                         ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
4993                                         ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
4994
4995                                 case movMemReg2op:
4996                                         ab.Put2(t[0], t[1])
4997                                         ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
4998                                         ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
4999
5000                                 case movFullPtr:
5001                                         if t[0] != 0 {
5002                                                 ab.Put1(t[0])
5003                                         }
5004                                         switch p.To.Index {
5005                                         default:
5006                                                 goto bad
5007
5008                                         case REG_DS:
5009                                                 ab.Put1(0xc5)
5010
5011                                         case REG_SS:
5012                                                 ab.Put2(0x0f, 0xb2)
5013
5014                                         case REG_ES:
5015                                                 ab.Put1(0xc4)
5016
5017                                         case REG_FS:
5018                                                 ab.Put2(0x0f, 0xb4)
5019
5020                                         case REG_GS:
5021                                                 ab.Put2(0x0f, 0xb5)
5022                                         }
5023
5024                                         ab.asmand(ctxt, cursym, p, &p.From, &p.To)
5025
5026                                 case movDoubleShift:
5027                                         if t[0] == Pw {
5028                                                 if ctxt.Arch.Family != sys.AMD64 {
5029                                                         ctxt.Diag("asmins: illegal 64: %v", p)
5030                                                 }
5031                                                 ab.rexflag |= Pw
5032                                                 t = t[1:]
5033                                         } else if t[0] == Pe {
5034                                                 ab.Put1(Pe)
5035                                                 t = t[1:]
5036                                         }
5037
5038                                         switch p.From.Type {
5039                                         default:
5040                                                 goto bad
5041
5042                                         case obj.TYPE_CONST:
5043                                                 ab.Put2(0x0f, t[0])
5044                                                 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5045                                                 ab.Put1(byte(p.From.Offset))
5046
5047                                         case obj.TYPE_REG:
5048                                                 switch p.From.Reg {
5049                                                 default:
5050                                                         goto bad
5051
5052                                                 case REG_CL, REG_CX:
5053                                                         ab.Put2(0x0f, t[1])
5054                                                         ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5055                                                 }
5056                                         }
5057
5058                                 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5059                                 // where you load the TLS base register into a register and then index off that
5060                                 // register to access the actual TLS variables. Systems that allow direct TLS access
5061                                 // are handled in prefixof above and should not be listed here.
5062                                 case movTLSReg:
5063                                         if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
5064                                                 ctxt.Diag("invalid load of TLS: %v", p)
5065                                         }
5066
5067                                         if ctxt.Arch.Family == sys.I386 {
5068                                                 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5069                                                 // where you load the TLS base register into a register and then index off that
5070                                                 // register to access the actual TLS variables. Systems that allow direct TLS access
5071                                                 // are handled in prefixof above and should not be listed here.
5072                                                 switch ctxt.Headtype {
5073                                                 default:
5074                                                         log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5075
5076                                                 case objabi.Hlinux, objabi.Hfreebsd:
5077                                                         if ctxt.Flag_shared {
5078                                                                 // Note that this is not generating the same insns as the other cases.
5079                                                                 //     MOV TLS, dst
5080                                                                 // becomes
5081                                                                 //     call __x86.get_pc_thunk.dst
5082                                                                 //     movl (gotpc + g@gotntpoff)(dst), dst
5083                                                                 // which is encoded as
5084                                                                 //     call __x86.get_pc_thunk.dst
5085                                                                 //     movq 0(dst), dst
5086                                                                 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
5087                                                                 // is g, which we can't check here, but will when we assemble the second
5088                                                                 // instruction.
5089                                                                 dst := p.To.Reg
5090                                                                 ab.Put1(0xe8)
5091                                                                 r = obj.Addrel(cursym)
5092                                                                 r.Off = int32(p.Pc + int64(ab.Len()))
5093                                                                 r.Type = objabi.R_CALL
5094                                                                 r.Siz = 4
5095                                                                 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
5096                                                                 ab.PutInt32(0)
5097
5098                                                                 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
5099                                                                 r = obj.Addrel(cursym)
5100                                                                 r.Off = int32(p.Pc + int64(ab.Len()))
5101                                                                 r.Type = objabi.R_TLS_IE
5102                                                                 r.Siz = 4
5103                                                                 r.Add = 2
5104                                                                 ab.PutInt32(0)
5105                                                         } else {
5106                                                                 // ELF TLS base is 0(GS).
5107                                                                 pp.From = p.From
5108
5109                                                                 pp.From.Type = obj.TYPE_MEM
5110                                                                 pp.From.Reg = REG_GS
5111                                                                 pp.From.Offset = 0
5112                                                                 pp.From.Index = REG_NONE
5113                                                                 pp.From.Scale = 0
5114                                                                 ab.Put2(0x65, // GS
5115                                                                         0x8B)
5116                                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5117                                                         }
5118                                                 case objabi.Hplan9:
5119                                                         pp.From = obj.Addr{}
5120                                                         pp.From.Type = obj.TYPE_MEM
5121                                                         pp.From.Name = obj.NAME_EXTERN
5122                                                         pp.From.Sym = plan9privates
5123                                                         pp.From.Offset = 0
5124                                                         pp.From.Index = REG_NONE
5125                                                         ab.Put1(0x8B)
5126                                                         ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5127
5128                                                 case objabi.Hwindows:
5129                                                         // Windows TLS base is always 0x14(FS).
5130                                                         pp.From = p.From
5131
5132                                                         pp.From.Type = obj.TYPE_MEM
5133                                                         pp.From.Reg = REG_FS
5134                                                         pp.From.Offset = 0x14
5135                                                         pp.From.Index = REG_NONE
5136                                                         pp.From.Scale = 0
5137                                                         ab.Put2(0x64, // FS
5138                                                                 0x8B)
5139                                                         ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5140                                                 }
5141                                                 break
5142                                         }
5143
5144                                         switch ctxt.Headtype {
5145                                         default:
5146                                                 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5147
5148                                         case objabi.Hlinux, objabi.Hfreebsd:
5149                                                 if !ctxt.Flag_shared {
5150                                                         log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
5151                                                 }
5152                                                 // Note that this is not generating the same insn as the other cases.
5153                                                 //     MOV TLS, R_to
5154                                                 // becomes
5155                                                 //     movq g@gottpoff(%rip), R_to
5156                                                 // which is encoded as
5157                                                 //     movq 0(%rip), R_to
5158                                                 // and a R_TLS_IE reloc. This all assumes the only tls variable we access
5159                                                 // is g, which we can't check here, but will when we assemble the second
5160                                                 // instruction.
5161                                                 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
5162
5163                                                 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
5164                                                 r = obj.Addrel(cursym)
5165                                                 r.Off = int32(p.Pc + int64(ab.Len()))
5166                                                 r.Type = objabi.R_TLS_IE
5167                                                 r.Siz = 4
5168                                                 r.Add = -4
5169                                                 ab.PutInt32(0)
5170
5171                                         case objabi.Hplan9:
5172                                                 pp.From = obj.Addr{}
5173                                                 pp.From.Type = obj.TYPE_MEM
5174                                                 pp.From.Name = obj.NAME_EXTERN
5175                                                 pp.From.Sym = plan9privates
5176                                                 pp.From.Offset = 0
5177                                                 pp.From.Index = REG_NONE
5178                                                 ab.rexflag |= Pw
5179                                                 ab.Put1(0x8B)
5180                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5181
5182                                         case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
5183                                                 // TLS base is 0(FS).
5184                                                 pp.From = p.From
5185
5186                                                 pp.From.Type = obj.TYPE_MEM
5187                                                 pp.From.Name = obj.NAME_NONE
5188                                                 pp.From.Reg = REG_NONE
5189                                                 pp.From.Offset = 0
5190                                                 pp.From.Index = REG_NONE
5191                                                 pp.From.Scale = 0
5192                                                 ab.rexflag |= Pw
5193                                                 ab.Put2(0x64, // FS
5194                                                         0x8B)
5195                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5196
5197                                         case objabi.Hwindows:
5198                                                 // Windows TLS base is always 0x28(GS).
5199                                                 pp.From = p.From
5200
5201                                                 pp.From.Type = obj.TYPE_MEM
5202                                                 pp.From.Name = obj.NAME_NONE
5203                                                 pp.From.Reg = REG_GS
5204                                                 pp.From.Offset = 0x28
5205                                                 pp.From.Index = REG_NONE
5206                                                 pp.From.Scale = 0
5207                                                 ab.rexflag |= Pw
5208                                                 ab.Put2(0x65, // GS
5209                                                         0x8B)
5210                                                 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5211                                         }
5212                                 }
5213                                 return
5214                         }
5215                 }
5216         }
5217         goto bad
5218
5219 bad:
5220         if ctxt.Arch.Family != sys.AMD64 {
5221                 // here, the assembly has failed.
5222                 // if it's a byte instruction that has
5223                 // unaddressable registers, try to
5224                 // exchange registers and reissue the
5225                 // instruction with the operands renamed.
5226                 pp := *p
5227
5228                 unbytereg(&pp.From, &pp.Ft)
5229                 unbytereg(&pp.To, &pp.Tt)
5230
5231                 z := int(p.From.Reg)
5232                 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5233                         // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5234                         // For now, different to keep bit-for-bit compatibility.
5235                         if ctxt.Arch.Family == sys.I386 {
5236                                 breg := byteswapreg(ctxt, &p.To)
5237                                 if breg != REG_AX {
5238                                         ab.Put1(0x87) // xchg lhs,bx
5239                                         ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5240                                         subreg(&pp, z, breg)
5241                                         ab.doasm(ctxt, cursym, &pp)
5242                                         ab.Put1(0x87) // xchg lhs,bx
5243                                         ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5244                                 } else {
5245                                         ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5246                                         subreg(&pp, z, REG_AX)
5247                                         ab.doasm(ctxt, cursym, &pp)
5248                                         ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5249                                 }
5250                                 return
5251                         }
5252
5253                         if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
5254                                 // We certainly don't want to exchange
5255                                 // with AX if the op is MUL or DIV.
5256                                 ab.Put1(0x87) // xchg lhs,bx
5257                                 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5258                                 subreg(&pp, z, REG_BX)
5259                                 ab.doasm(ctxt, cursym, &pp)
5260                                 ab.Put1(0x87) // xchg lhs,bx
5261                                 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5262                         } else {
5263                                 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5264                                 subreg(&pp, z, REG_AX)
5265                                 ab.doasm(ctxt, cursym, &pp)
5266                                 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5267                         }
5268                         return
5269                 }
5270
5271                 z = int(p.To.Reg)
5272                 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5273                         // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5274                         // For now, different to keep bit-for-bit compatibility.
5275                         if ctxt.Arch.Family == sys.I386 {
5276                                 breg := byteswapreg(ctxt, &p.From)
5277                                 if breg != REG_AX {
5278                                         ab.Put1(0x87) //xchg rhs,bx
5279                                         ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5280                                         subreg(&pp, z, breg)
5281                                         ab.doasm(ctxt, cursym, &pp)
5282                                         ab.Put1(0x87) // xchg rhs,bx
5283                                         ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5284                                 } else {
5285                                         ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5286                                         subreg(&pp, z, REG_AX)
5287                                         ab.doasm(ctxt, cursym, &pp)
5288                                         ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5289                                 }
5290                                 return
5291                         }
5292
5293                         if isax(&p.From) {
5294                                 ab.Put1(0x87) // xchg rhs,bx
5295                                 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5296                                 subreg(&pp, z, REG_BX)
5297                                 ab.doasm(ctxt, cursym, &pp)
5298                                 ab.Put1(0x87) // xchg rhs,bx
5299                                 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5300                         } else {
5301                                 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5302                                 subreg(&pp, z, REG_AX)
5303                                 ab.doasm(ctxt, cursym, &pp)
5304                                 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5305                         }
5306                         return
5307                 }
5308         }
5309
5310         ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
5311 }
5312
5313 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
5314 // which is not referenced in a.
5315 // If a is empty, it returns BX to account for MULB-like instructions
5316 // that might use DX and AX.
5317 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
5318         cana, canb, canc, cand := true, true, true, true
5319         if a.Type == obj.TYPE_NONE {
5320                 cana, cand = false, false
5321         }
5322
5323         if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
5324                 switch a.Reg {
5325                 case REG_NONE:
5326                         cana, cand = false, false
5327                 case REG_AX, REG_AL, REG_AH:
5328                         cana = false
5329                 case REG_BX, REG_BL, REG_BH:
5330                         canb = false
5331                 case REG_CX, REG_CL, REG_CH:
5332                         canc = false
5333                 case REG_DX, REG_DL, REG_DH:
5334                         cand = false
5335                 }
5336         }
5337
5338         if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
5339                 switch a.Index {
5340                 case REG_AX:
5341                         cana = false
5342                 case REG_BX:
5343                         canb = false
5344                 case REG_CX:
5345                         canc = false
5346                 case REG_DX:
5347                         cand = false
5348                 }
5349         }
5350
5351         switch {
5352         case cana:
5353                 return REG_AX
5354         case canb:
5355                 return REG_BX
5356         case canc:
5357                 return REG_CX
5358         case cand:
5359                 return REG_DX
5360         default:
5361                 ctxt.Diag("impossible byte register")
5362                 ctxt.DiagFlush()
5363                 log.Fatalf("bad code")
5364                 return 0
5365         }
5366 }
5367
5368 func isbadbyte(a *obj.Addr) bool {
5369         return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
5370 }
5371
5372 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
5373         ab.Reset()
5374
5375         ab.rexflag = 0
5376         ab.vexflag = false
5377         ab.evexflag = false
5378         mark := ab.Len()
5379         ab.doasm(ctxt, cursym, p)
5380         if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5381                 // as befits the whole approach of the architecture,
5382                 // the rex prefix must appear before the first opcode byte
5383                 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
5384                 // before the 0f opcode escape!), or it might be ignored.
5385                 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
5386                 if ctxt.Arch.Family != sys.AMD64 {
5387                         ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
5388                 }
5389                 n := ab.Len()
5390                 var np int
5391                 for np = mark; np < n; np++ {
5392                         c := ab.At(np)
5393                         if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
5394                                 break
5395                         }
5396                 }
5397                 ab.Insert(np, byte(0x40|ab.rexflag))
5398         }
5399
5400         n := ab.Len()
5401         for i := len(cursym.R) - 1; i >= 0; i-- {
5402                 r := &cursym.R[i]
5403                 if int64(r.Off) < p.Pc {
5404                         break
5405                 }
5406                 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5407                         r.Off++
5408                 }
5409                 if r.Type == objabi.R_PCREL {
5410                         if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
5411                                 // PC-relative addressing is relative to the end of the instruction,
5412                                 // but the relocations applied by the linker are relative to the end
5413                                 // of the relocation. Because immediate instruction
5414                                 // arguments can follow the PC-relative memory reference in the
5415                                 // instruction encoding, the two may not coincide. In this case,
5416                                 // adjust addend so that linker can keep relocating relative to the
5417                                 // end of the relocation.
5418                                 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
5419                         } else if ctxt.Arch.Family == sys.I386 {
5420                                 // On 386 PC-relative addressing (for non-call/jmp instructions)
5421                                 // assumes that the previous instruction loaded the PC of the end
5422                                 // of that instruction into CX, so the adjustment is relative to
5423                                 // that.
5424                                 r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5425                         }
5426                 }
5427                 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
5428                         // On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
5429                         r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5430                 }
5431
5432         }
5433 }
5434
5435 // unpackOps4 extracts 4 operands from p.
5436 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
5437         return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
5438 }
5439
5440 // unpackOps5 extracts 5 operands from p.
5441 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
5442         return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
5443 }