]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/internal/obj/arm64/obj7.go
cmd/compile: Add some CMP and CMN optimization rules on arm64
[gostls13.git] / src / cmd / internal / obj / arm64 / obj7.go
1 // cmd/7l/noop.c, cmd/7l/obj.c, cmd/ld/pass.c from Vita Nuova.
2 // https://code.google.com/p/ken-cc/source/browse/
3 //
4 //      Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 //      Portions Copyright © 2004,2006 Bruce Ellis
9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 //      Portions Copyright © 2009 The Go Authors. All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 package arm64
32
33 import (
34         "cmd/internal/obj"
35         "cmd/internal/objabi"
36         "cmd/internal/src"
37         "cmd/internal/sys"
38         "internal/buildcfg"
39         "log"
40         "math"
41 )
42
43 var complements = []obj.As{
44         AADD:  ASUB,
45         AADDW: ASUBW,
46         ASUB:  AADD,
47         ASUBW: AADDW,
48 }
49
50 // zrReplace is the set of instructions for which $0 in the From operand
51 // should be replaced with REGZERO.
52 var zrReplace = map[obj.As]bool{
53         AMOVD:  true,
54         AMOVW:  true,
55         AMOVWU: true,
56         AMOVH:  true,
57         AMOVHU: true,
58         AMOVB:  true,
59         AMOVBU: true,
60         ASBC:   true,
61         ASBCW:  true,
62         ASBCS:  true,
63         ASBCSW: true,
64         AADC:   true,
65         AADCW:  true,
66         AADCS:  true,
67         AADCSW: true,
68         AFMOVD: true,
69         AFMOVS: true,
70         AMSR:   true,
71 }
72
73 func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
74         if c.ctxt.Flag_maymorestack != "" {
75                 p = c.cursym.Func().SpillRegisterArgs(p, c.newprog)
76
77                 // Save LR and make room for FP, REGCTXT. Leave room
78                 // for caller's saved FP.
79                 const frameSize = 32
80                 p = obj.Appendp(p, c.newprog)
81                 p.As = AMOVD
82                 p.From.Type = obj.TYPE_REG
83                 p.From.Reg = REGLINK
84                 p.To.Type = obj.TYPE_MEM
85                 p.Scond = C_XPRE
86                 p.To.Offset = -frameSize
87                 p.To.Reg = REGSP
88                 p.Spadj = frameSize
89
90                 // Save FP.
91                 p = obj.Appendp(p, c.newprog)
92                 p.As = AMOVD
93                 p.From.Type = obj.TYPE_REG
94                 p.From.Reg = REGFP
95                 p.To.Type = obj.TYPE_MEM
96                 p.To.Reg = REGSP
97                 p.To.Offset = -8
98
99                 p = obj.Appendp(p, c.newprog)
100                 p.As = ASUB
101                 p.From.Type = obj.TYPE_CONST
102                 p.From.Offset = 8
103                 p.Reg = REGSP
104                 p.To.Type = obj.TYPE_REG
105                 p.To.Reg = REGFP
106
107                 // Save REGCTXT (for simplicity we do this whether or
108                 // not we need it.)
109                 p = obj.Appendp(p, c.newprog)
110                 p.As = AMOVD
111                 p.From.Type = obj.TYPE_REG
112                 p.From.Reg = REGCTXT
113                 p.To.Type = obj.TYPE_MEM
114                 p.To.Reg = REGSP
115                 p.To.Offset = 8
116
117                 // BL maymorestack
118                 p = obj.Appendp(p, c.newprog)
119                 p.As = ABL
120                 p.To.Type = obj.TYPE_BRANCH
121                 // See ../x86/obj6.go
122                 p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI())
123
124                 // Restore REGCTXT.
125                 p = obj.Appendp(p, c.newprog)
126                 p.As = AMOVD
127                 p.From.Type = obj.TYPE_MEM
128                 p.From.Reg = REGSP
129                 p.From.Offset = 8
130                 p.To.Type = obj.TYPE_REG
131                 p.To.Reg = REGCTXT
132
133                 // Restore FP.
134                 p = obj.Appendp(p, c.newprog)
135                 p.As = AMOVD
136                 p.From.Type = obj.TYPE_MEM
137                 p.From.Reg = REGSP
138                 p.From.Offset = -8
139                 p.To.Type = obj.TYPE_REG
140                 p.To.Reg = REGFP
141
142                 // Restore LR and SP.
143                 p = obj.Appendp(p, c.newprog)
144                 p.As = AMOVD
145                 p.From.Type = obj.TYPE_MEM
146                 p.Scond = C_XPOST
147                 p.From.Offset = frameSize
148                 p.From.Reg = REGSP
149                 p.To.Type = obj.TYPE_REG
150                 p.To.Reg = REGLINK
151                 p.Spadj = -frameSize
152
153                 p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog)
154         }
155
156         // Jump back to here after morestack returns.
157         startPred := p
158
159         // MOV  g_stackguard(g), RT1
160         p = obj.Appendp(p, c.newprog)
161
162         p.As = AMOVD
163         p.From.Type = obj.TYPE_MEM
164         p.From.Reg = REGG
165         p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0
166         if c.cursym.CFunc() {
167                 p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1
168         }
169         p.To.Type = obj.TYPE_REG
170         p.To.Reg = REGRT1
171
172         // Mark the stack bound check and morestack call async nonpreemptible.
173         // If we get preempted here, when resumed the preemption request is
174         // cleared, but we'll still call morestack, which will double the stack
175         // unnecessarily. See issue #35470.
176         p = c.ctxt.StartUnsafePoint(p, c.newprog)
177
178         q := (*obj.Prog)(nil)
179         if framesize <= objabi.StackSmall {
180                 // small stack: SP < stackguard
181                 //      CMP     stackguard, SP
182
183                 p = obj.Appendp(p, c.newprog)
184                 p.As = ACMP
185                 p.From.Type = obj.TYPE_REG
186                 p.From.Reg = REGRT1
187                 p.Reg = REGSP
188         } else if framesize <= objabi.StackBig {
189                 // large stack: SP-framesize < stackguard-StackSmall
190                 //      SUB     $(framesize-StackSmall), SP, RT2
191                 //      CMP     stackguard, RT2
192                 p = obj.Appendp(p, c.newprog)
193
194                 p.As = ASUB
195                 p.From.Type = obj.TYPE_CONST
196                 p.From.Offset = int64(framesize) - objabi.StackSmall
197                 p.Reg = REGSP
198                 p.To.Type = obj.TYPE_REG
199                 p.To.Reg = REGRT2
200
201                 p = obj.Appendp(p, c.newprog)
202                 p.As = ACMP
203                 p.From.Type = obj.TYPE_REG
204                 p.From.Reg = REGRT1
205                 p.Reg = REGRT2
206         } else {
207                 // Such a large stack we need to protect against underflow.
208                 // The runtime guarantees SP > objabi.StackBig, but
209                 // framesize is large enough that SP-framesize may
210                 // underflow, causing a direct comparison with the
211                 // stack guard to incorrectly succeed. We explicitly
212                 // guard against underflow.
213                 //
214                 //      SUBS    $(framesize-StackSmall), SP, RT2
215                 //      // On underflow, jump to morestack
216                 //      BLO     label_of_call_to_morestack
217                 //      CMP     stackguard, RT2
218
219                 p = obj.Appendp(p, c.newprog)
220                 p.As = ASUBS
221                 p.From.Type = obj.TYPE_CONST
222                 p.From.Offset = int64(framesize) - objabi.StackSmall
223                 p.Reg = REGSP
224                 p.To.Type = obj.TYPE_REG
225                 p.To.Reg = REGRT2
226
227                 p = obj.Appendp(p, c.newprog)
228                 q = p
229                 p.As = ABLO
230                 p.To.Type = obj.TYPE_BRANCH
231
232                 p = obj.Appendp(p, c.newprog)
233                 p.As = ACMP
234                 p.From.Type = obj.TYPE_REG
235                 p.From.Reg = REGRT1
236                 p.Reg = REGRT2
237         }
238
239         // BLS  do-morestack
240         bls := obj.Appendp(p, c.newprog)
241         bls.As = ABLS
242         bls.To.Type = obj.TYPE_BRANCH
243
244         end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1)
245
246         var last *obj.Prog
247         for last = c.cursym.Func().Text; last.Link != nil; last = last.Link {
248         }
249
250         // Now we are at the end of the function, but logically
251         // we are still in function prologue. We need to fix the
252         // SP data and PCDATA.
253         spfix := obj.Appendp(last, c.newprog)
254         spfix.As = obj.ANOP
255         spfix.Spadj = -framesize
256
257         pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog)
258         pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog)
259
260         if q != nil {
261                 q.To.SetTarget(pcdata)
262         }
263         bls.To.SetTarget(pcdata)
264
265         spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog)
266
267         // MOV  LR, R3
268         movlr := obj.Appendp(spill, c.newprog)
269         movlr.As = AMOVD
270         movlr.From.Type = obj.TYPE_REG
271         movlr.From.Reg = REGLINK
272         movlr.To.Type = obj.TYPE_REG
273         movlr.To.Reg = REG_R3
274
275         debug := movlr
276         if false {
277                 debug = obj.Appendp(debug, c.newprog)
278                 debug.As = AMOVD
279                 debug.From.Type = obj.TYPE_CONST
280                 debug.From.Offset = int64(framesize)
281                 debug.To.Type = obj.TYPE_REG
282                 debug.To.Reg = REGTMP
283         }
284
285         // BL   runtime.morestack(SB)
286         call := obj.Appendp(debug, c.newprog)
287         call.As = ABL
288         call.To.Type = obj.TYPE_BRANCH
289         morestack := "runtime.morestack"
290         switch {
291         case c.cursym.CFunc():
292                 morestack = "runtime.morestackc"
293         case !c.cursym.Func().Text.From.Sym.NeedCtxt():
294                 morestack = "runtime.morestack_noctxt"
295         }
296         call.To.Sym = c.ctxt.Lookup(morestack)
297
298         unspill := c.cursym.Func().UnspillRegisterArgs(call, c.newprog)
299         pcdata = c.ctxt.EndUnsafePoint(unspill, c.newprog, -1)
300
301         // B    start
302         jmp := obj.Appendp(pcdata, c.newprog)
303         jmp.As = AB
304         jmp.To.Type = obj.TYPE_BRANCH
305         jmp.To.SetTarget(startPred.Link)
306         jmp.Spadj = +framesize
307
308         return end
309 }
310
311 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
312         c := ctxt7{ctxt: ctxt, newprog: newprog}
313
314         p.From.Class = 0
315         p.To.Class = 0
316
317         // Previously we rewrote $0 to ZR, but we have now removed this change.
318         // In order to be compatible with some previous legal instruction formats,
319         // reserve the previous conversion for some specific instructions.
320         if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 && zrReplace[p.As] {
321                 p.From.Type = obj.TYPE_REG
322                 p.From.Reg = REGZERO
323         }
324
325         // Rewrite BR/BL to symbol as TYPE_BRANCH.
326         switch p.As {
327         case AB,
328                 ABL,
329                 obj.ARET,
330                 obj.ADUFFZERO,
331                 obj.ADUFFCOPY:
332                 if p.To.Sym != nil {
333                         p.To.Type = obj.TYPE_BRANCH
334                 }
335                 break
336         }
337
338         // Rewrite float constants to values stored in memory.
339         switch p.As {
340         case AFMOVS:
341                 if p.From.Type == obj.TYPE_FCONST {
342                         f64 := p.From.Val.(float64)
343                         f32 := float32(f64)
344                         if c.chipfloat7(f64) > 0 {
345                                 break
346                         }
347                         if math.Float32bits(f32) == 0 {
348                                 p.From.Type = obj.TYPE_REG
349                                 p.From.Reg = REGZERO
350                                 break
351                         }
352                         p.From.Type = obj.TYPE_MEM
353                         p.From.Sym = c.ctxt.Float32Sym(f32)
354                         p.From.Name = obj.NAME_EXTERN
355                         p.From.Offset = 0
356                 }
357
358         case AFMOVD:
359                 if p.From.Type == obj.TYPE_FCONST {
360                         f64 := p.From.Val.(float64)
361                         if c.chipfloat7(f64) > 0 {
362                                 break
363                         }
364                         if math.Float64bits(f64) == 0 {
365                                 p.From.Type = obj.TYPE_REG
366                                 p.From.Reg = REGZERO
367                                 break
368                         }
369                         p.From.Type = obj.TYPE_MEM
370                         p.From.Sym = c.ctxt.Float64Sym(f64)
371                         p.From.Name = obj.NAME_EXTERN
372                         p.From.Offset = 0
373                 }
374
375                 break
376         }
377
378         // Rewrite negative immediates as positive immediates with
379         // complementary instruction.
380         switch p.As {
381         case AADD, ASUB:
382                 if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
383                         p.From.Offset = -p.From.Offset
384                         p.As = complements[p.As]
385                 }
386         case AADDW, ASUBW:
387                 if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
388                         p.From.Offset = -p.From.Offset
389                         p.As = complements[p.As]
390                 }
391         }
392
393         if c.ctxt.Flag_dynlink {
394                 c.rewriteToUseGot(p)
395         }
396 }
397
398 // Rewrite p, if necessary, to access global data via the global offset table.
399 func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
400         if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
401                 //     ADUFFxxx $offset
402                 // becomes
403                 //     MOVD runtime.duffxxx@GOT, REGTMP
404                 //     ADD $offset, REGTMP
405                 //     CALL REGTMP
406                 var sym *obj.LSym
407                 if p.As == obj.ADUFFZERO {
408                         sym = c.ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
409                 } else {
410                         sym = c.ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
411                 }
412                 offset := p.To.Offset
413                 p.As = AMOVD
414                 p.From.Type = obj.TYPE_MEM
415                 p.From.Name = obj.NAME_GOTREF
416                 p.From.Sym = sym
417                 p.To.Type = obj.TYPE_REG
418                 p.To.Reg = REGTMP
419                 p.To.Name = obj.NAME_NONE
420                 p.To.Offset = 0
421                 p.To.Sym = nil
422                 p1 := obj.Appendp(p, c.newprog)
423                 p1.As = AADD
424                 p1.From.Type = obj.TYPE_CONST
425                 p1.From.Offset = offset
426                 p1.To.Type = obj.TYPE_REG
427                 p1.To.Reg = REGTMP
428                 p2 := obj.Appendp(p1, c.newprog)
429                 p2.As = obj.ACALL
430                 p2.To.Type = obj.TYPE_REG
431                 p2.To.Reg = REGTMP
432         }
433
434         // We only care about global data: NAME_EXTERN means a global
435         // symbol in the Go sense, and p.Sym.Local is true for a few
436         // internally defined symbols.
437         if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
438                 // MOVD $sym, Rx becomes MOVD sym@GOT, Rx
439                 // MOVD $sym+<off>, Rx becomes MOVD sym@GOT, Rx; ADD <off>, Rx
440                 if p.As != AMOVD {
441                         c.ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p)
442                 }
443                 if p.To.Type != obj.TYPE_REG {
444                         c.ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p)
445                 }
446                 p.From.Type = obj.TYPE_MEM
447                 p.From.Name = obj.NAME_GOTREF
448                 if p.From.Offset != 0 {
449                         q := obj.Appendp(p, c.newprog)
450                         q.As = AADD
451                         q.From.Type = obj.TYPE_CONST
452                         q.From.Offset = p.From.Offset
453                         q.To = p.To
454                         p.From.Offset = 0
455                 }
456         }
457         if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
458                 c.ctxt.Diag("don't know how to handle %v with -dynlink", p)
459         }
460         var source *obj.Addr
461         // MOVx sym, Ry becomes MOVD sym@GOT, REGTMP; MOVx (REGTMP), Ry
462         // MOVx Ry, sym becomes MOVD sym@GOT, REGTMP; MOVD Ry, (REGTMP)
463         // An addition may be inserted between the two MOVs if there is an offset.
464         if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
465                 if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
466                         c.ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
467                 }
468                 source = &p.From
469         } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
470                 source = &p.To
471         } else {
472                 return
473         }
474         if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
475                 return
476         }
477         if source.Sym.Type == objabi.STLSBSS {
478                 return
479         }
480         if source.Type != obj.TYPE_MEM {
481                 c.ctxt.Diag("don't know how to handle %v with -dynlink", p)
482         }
483         p1 := obj.Appendp(p, c.newprog)
484         p2 := obj.Appendp(p1, c.newprog)
485         p1.As = AMOVD
486         p1.From.Type = obj.TYPE_MEM
487         p1.From.Sym = source.Sym
488         p1.From.Name = obj.NAME_GOTREF
489         p1.To.Type = obj.TYPE_REG
490         p1.To.Reg = REGTMP
491
492         p2.As = p.As
493         p2.From = p.From
494         p2.To = p.To
495         if p.From.Name == obj.NAME_EXTERN {
496                 p2.From.Reg = REGTMP
497                 p2.From.Name = obj.NAME_NONE
498                 p2.From.Sym = nil
499         } else if p.To.Name == obj.NAME_EXTERN {
500                 p2.To.Reg = REGTMP
501                 p2.To.Name = obj.NAME_NONE
502                 p2.To.Sym = nil
503         } else {
504                 return
505         }
506         obj.Nopout(p)
507 }
508
509 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
510         if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
511                 return
512         }
513
514         c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym}
515
516         p := c.cursym.Func().Text
517         textstksiz := p.To.Offset
518         if textstksiz == -8 {
519                 // Historical way to mark NOFRAME.
520                 p.From.Sym.Set(obj.AttrNoFrame, true)
521                 textstksiz = 0
522         }
523         if textstksiz < 0 {
524                 c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz)
525         }
526         if p.From.Sym.NoFrame() {
527                 if textstksiz != 0 {
528                         c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz)
529                 }
530         }
531
532         c.cursym.Func().Args = p.To.Val.(int32)
533         c.cursym.Func().Locals = int32(textstksiz)
534
535         /*
536          * find leaf subroutines
537          */
538         for p := c.cursym.Func().Text; p != nil; p = p.Link {
539                 switch p.As {
540                 case obj.ATEXT:
541                         p.Mark |= LEAF
542
543                 case ABL,
544                         obj.ADUFFZERO,
545                         obj.ADUFFCOPY:
546                         c.cursym.Func().Text.Mark &^= LEAF
547                 }
548         }
549
550         var q *obj.Prog
551         var q1 *obj.Prog
552         var retjmp *obj.LSym
553         for p := c.cursym.Func().Text; p != nil; p = p.Link {
554                 o := p.As
555                 switch o {
556                 case obj.ATEXT:
557                         c.cursym.Func().Text = p
558                         c.autosize = int32(textstksiz)
559
560                         if p.Mark&LEAF != 0 && c.autosize == 0 {
561                                 // A leaf function with no locals has no frame.
562                                 p.From.Sym.Set(obj.AttrNoFrame, true)
563                         }
564
565                         if !p.From.Sym.NoFrame() {
566                                 // If there is a stack frame at all, it includes
567                                 // space to save the LR.
568                                 c.autosize += 8
569                         }
570
571                         if c.autosize != 0 {
572                                 extrasize := int32(0)
573                                 if c.autosize%16 == 8 {
574                                         // Allocate extra 8 bytes on the frame top to save FP
575                                         extrasize = 8
576                                 } else if c.autosize&(16-1) == 0 {
577                                         // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
578                                         extrasize = 16
579                                 } else {
580                                         c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
581                                 }
582                                 c.autosize += extrasize
583                                 c.cursym.Func().Locals += extrasize
584
585                                 // low 32 bits for autosize
586                                 // high 32 bits for extrasize
587                                 p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
588                         } else {
589                                 // NOFRAME
590                                 p.To.Offset = 0
591                         }
592
593                         if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 {
594                                 if c.ctxt.Debugvlog {
595                                         c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name)
596                                 }
597                                 c.cursym.Func().Text.Mark |= LEAF
598                         }
599
600                         if cursym.Func().Text.Mark&LEAF != 0 {
601                                 cursym.Set(obj.AttrLeaf, true)
602                                 if p.From.Sym.NoFrame() {
603                                         break
604                                 }
605                         }
606
607                         if p.Mark&LEAF != 0 && c.autosize < objabi.StackSmall {
608                                 // A leaf function with a small stack can be marked
609                                 // NOSPLIT, avoiding a stack check.
610                                 p.From.Sym.Set(obj.AttrNoSplit, true)
611                         }
612
613                         if !p.From.Sym.NoSplit() {
614                                 p = c.stacksplit(p, c.autosize) // emit split check
615                         }
616
617                         var prologueEnd *obj.Prog
618
619                         aoffset := c.autosize
620                         if aoffset > 0xf0 {
621                                 // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
622                                 // so the maximum aoffset value is 0xf0.
623                                 aoffset = 0xf0
624                         }
625
626                         // Frame is non-empty. Make sure to save link register, even if
627                         // it is a leaf function, so that traceback works.
628                         q = p
629                         if c.autosize > aoffset {
630                                 // Frame size is too large for a MOVD.W instruction. Store the frame pointer
631                                 // register and link register before decrementing SP, so if a signal comes
632                                 // during the execution of the function prologue, the traceback code will
633                                 // not see a half-updated stack frame.
634
635                                 // SUB $autosize, RSP, R20
636                                 q1 = obj.Appendp(q, c.newprog)
637                                 q1.Pos = p.Pos
638                                 q1.As = ASUB
639                                 q1.From.Type = obj.TYPE_CONST
640                                 q1.From.Offset = int64(c.autosize)
641                                 q1.Reg = REGSP
642                                 q1.To.Type = obj.TYPE_REG
643                                 q1.To.Reg = REG_R20
644
645                                 prologueEnd = q1
646
647                                 // STP (R29, R30), -8(R20)
648                                 q1 = obj.Appendp(q1, c.newprog)
649                                 q1.Pos = p.Pos
650                                 q1.As = ASTP
651                                 q1.From.Type = obj.TYPE_REGREG
652                                 q1.From.Reg = REGFP
653                                 q1.From.Offset = REGLINK
654                                 q1.To.Type = obj.TYPE_MEM
655                                 q1.To.Reg = REG_R20
656                                 q1.To.Offset = -8
657
658                                 // This is not async preemptible, as if we open a frame
659                                 // at the current SP, it will clobber the saved LR.
660                                 q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
661
662                                 // MOVD R20, RSP
663                                 q1 = obj.Appendp(q1, c.newprog)
664                                 q1.Pos = p.Pos
665                                 q1.As = AMOVD
666                                 q1.From.Type = obj.TYPE_REG
667                                 q1.From.Reg = REG_R20
668                                 q1.To.Type = obj.TYPE_REG
669                                 q1.To.Reg = REGSP
670                                 q1.Spadj = c.autosize
671
672                                 q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
673
674                                 if buildcfg.GOOS == "ios" {
675                                         // iOS does not support SA_ONSTACK. We will run the signal handler
676                                         // on the G stack. If we write below SP, it may be clobbered by
677                                         // the signal handler. So we save FP and LR after decrementing SP.
678                                         // STP (R29, R30), -8(RSP)
679                                         q1 = obj.Appendp(q1, c.newprog)
680                                         q1.Pos = p.Pos
681                                         q1.As = ASTP
682                                         q1.From.Type = obj.TYPE_REGREG
683                                         q1.From.Reg = REGFP
684                                         q1.From.Offset = REGLINK
685                                         q1.To.Type = obj.TYPE_MEM
686                                         q1.To.Reg = REGSP
687                                         q1.To.Offset = -8
688                                 }
689                         } else {
690                                 // small frame, update SP and save LR in a single MOVD.W instruction.
691                                 // So if a signal comes during the execution of the function prologue,
692                                 // the traceback code will not see a half-updated stack frame.
693                                 // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
694                                 // early on before the signal stack is set, as glibc doesn't allow
695                                 // us to block SIGSETXID. So it is important that we don't write below
696                                 // the SP until the signal stack is set.
697                                 // Luckily, all the functions from thread entry to setting the signal
698                                 // stack have small frames.
699                                 q1 = obj.Appendp(q, c.newprog)
700                                 q1.As = AMOVD
701                                 q1.Pos = p.Pos
702                                 q1.From.Type = obj.TYPE_REG
703                                 q1.From.Reg = REGLINK
704                                 q1.To.Type = obj.TYPE_MEM
705                                 q1.Scond = C_XPRE
706                                 q1.To.Offset = int64(-aoffset)
707                                 q1.To.Reg = REGSP
708                                 q1.Spadj = aoffset
709
710                                 prologueEnd = q1
711
712                                 // Frame pointer.
713                                 q1 = obj.Appendp(q1, c.newprog)
714                                 q1.Pos = p.Pos
715                                 q1.As = AMOVD
716                                 q1.From.Type = obj.TYPE_REG
717                                 q1.From.Reg = REGFP
718                                 q1.To.Type = obj.TYPE_MEM
719                                 q1.To.Reg = REGSP
720                                 q1.To.Offset = -8
721                         }
722
723                         prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
724
725                         q1 = obj.Appendp(q1, c.newprog)
726                         q1.Pos = p.Pos
727                         q1.As = ASUB
728                         q1.From.Type = obj.TYPE_CONST
729                         q1.From.Offset = 8
730                         q1.Reg = REGSP
731                         q1.To.Type = obj.TYPE_REG
732                         q1.To.Reg = REGFP
733
734                         if c.cursym.Func().Text.From.Sym.Wrapper() {
735                                 // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
736                                 //
737                                 //      MOV  g_panic(g), RT1
738                                 //      CBNZ checkargp
739                                 // end:
740                                 //      NOP
741                                 // ... function body ...
742                                 // checkargp:
743                                 //      MOV  panic_argp(RT1), RT2
744                                 //      ADD  $(autosize+8), RSP, R20
745                                 //      CMP  RT2, R20
746                                 //      BNE  end
747                                 //      ADD  $8, RSP, R20
748                                 //      MOVD R20, panic_argp(RT1)
749                                 //      B    end
750                                 //
751                                 // The NOP is needed to give the jumps somewhere to land.
752                                 // It is a liblink NOP, not an ARM64 NOP: it encodes to 0 instruction bytes.
753                                 q = q1
754
755                                 // MOV g_panic(g), RT1
756                                 q = obj.Appendp(q, c.newprog)
757                                 q.As = AMOVD
758                                 q.From.Type = obj.TYPE_MEM
759                                 q.From.Reg = REGG
760                                 q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic
761                                 q.To.Type = obj.TYPE_REG
762                                 q.To.Reg = REGRT1
763
764                                 // CBNZ RT1, checkargp
765                                 cbnz := obj.Appendp(q, c.newprog)
766                                 cbnz.As = ACBNZ
767                                 cbnz.From.Type = obj.TYPE_REG
768                                 cbnz.From.Reg = REGRT1
769                                 cbnz.To.Type = obj.TYPE_BRANCH
770
771                                 // Empty branch target at the top of the function body
772                                 end := obj.Appendp(cbnz, c.newprog)
773                                 end.As = obj.ANOP
774
775                                 // find the end of the function
776                                 var last *obj.Prog
777                                 for last = end; last.Link != nil; last = last.Link {
778                                 }
779
780                                 // MOV panic_argp(RT1), RT2
781                                 mov := obj.Appendp(last, c.newprog)
782                                 mov.As = AMOVD
783                                 mov.From.Type = obj.TYPE_MEM
784                                 mov.From.Reg = REGRT1
785                                 mov.From.Offset = 0 // Panic.argp
786                                 mov.To.Type = obj.TYPE_REG
787                                 mov.To.Reg = REGRT2
788
789                                 // CBNZ branches to the MOV above
790                                 cbnz.To.SetTarget(mov)
791
792                                 // ADD $(autosize+8), SP, R20
793                                 q = obj.Appendp(mov, c.newprog)
794                                 q.As = AADD
795                                 q.From.Type = obj.TYPE_CONST
796                                 q.From.Offset = int64(c.autosize) + 8
797                                 q.Reg = REGSP
798                                 q.To.Type = obj.TYPE_REG
799                                 q.To.Reg = REG_R20
800
801                                 // CMP RT2, R20
802                                 q = obj.Appendp(q, c.newprog)
803                                 q.As = ACMP
804                                 q.From.Type = obj.TYPE_REG
805                                 q.From.Reg = REGRT2
806                                 q.Reg = REG_R20
807
808                                 // BNE end
809                                 q = obj.Appendp(q, c.newprog)
810                                 q.As = ABNE
811                                 q.To.Type = obj.TYPE_BRANCH
812                                 q.To.SetTarget(end)
813
814                                 // ADD $8, SP, R20
815                                 q = obj.Appendp(q, c.newprog)
816                                 q.As = AADD
817                                 q.From.Type = obj.TYPE_CONST
818                                 q.From.Offset = 8
819                                 q.Reg = REGSP
820                                 q.To.Type = obj.TYPE_REG
821                                 q.To.Reg = REG_R20
822
823                                 // MOV R20, panic_argp(RT1)
824                                 q = obj.Appendp(q, c.newprog)
825                                 q.As = AMOVD
826                                 q.From.Type = obj.TYPE_REG
827                                 q.From.Reg = REG_R20
828                                 q.To.Type = obj.TYPE_MEM
829                                 q.To.Reg = REGRT1
830                                 q.To.Offset = 0 // Panic.argp
831
832                                 // B end
833                                 q = obj.Appendp(q, c.newprog)
834                                 q.As = AB
835                                 q.To.Type = obj.TYPE_BRANCH
836                                 q.To.SetTarget(end)
837                         }
838
839                 case obj.ARET:
840                         nocache(p)
841                         if p.From.Type == obj.TYPE_CONST {
842                                 c.ctxt.Diag("using BECOME (%v) is not supported!", p)
843                                 break
844                         }
845
846                         retjmp = p.To.Sym
847                         p.To = obj.Addr{}
848                         if c.cursym.Func().Text.Mark&LEAF != 0 {
849                                 if c.autosize != 0 {
850                                         p.As = AADD
851                                         p.From.Type = obj.TYPE_CONST
852                                         p.From.Offset = int64(c.autosize)
853                                         p.To.Type = obj.TYPE_REG
854                                         p.To.Reg = REGSP
855                                         p.Spadj = -c.autosize
856
857                                         // Frame pointer.
858                                         p = obj.Appendp(p, c.newprog)
859                                         p.As = ASUB
860                                         p.From.Type = obj.TYPE_CONST
861                                         p.From.Offset = 8
862                                         p.Reg = REGSP
863                                         p.To.Type = obj.TYPE_REG
864                                         p.To.Reg = REGFP
865                                 }
866                         } else {
867                                 aoffset := c.autosize
868                                 // LDP -8(RSP), (R29, R30)
869                                 p.As = ALDP
870                                 p.From.Type = obj.TYPE_MEM
871                                 p.From.Offset = -8
872                                 p.From.Reg = REGSP
873                                 p.To.Type = obj.TYPE_REGREG
874                                 p.To.Reg = REGFP
875                                 p.To.Offset = REGLINK
876
877                                 // ADD $aoffset, RSP, RSP
878                                 q = newprog()
879                                 q.As = AADD
880                                 q.From.Type = obj.TYPE_CONST
881                                 q.From.Offset = int64(aoffset)
882                                 q.To.Type = obj.TYPE_REG
883                                 q.To.Reg = REGSP
884                                 q.Spadj = -aoffset
885                                 q.Pos = p.Pos
886                                 q.Link = p.Link
887                                 p.Link = q
888                                 p = q
889                         }
890
891                         // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
892                         // so that if you are debugging a low-level crash where PC and LR are zero,
893                         // you can look at R27 to see what jumped to the zero.
894                         // This is useful when bringing up Go on a new system.
895                         // (There is similar code in ../ppc64/obj9.go:/if.false.)
896                         const debugRETZERO = false
897                         if debugRETZERO {
898                                 if p.As != obj.ARET {
899                                         q = newprog()
900                                         q.Pos = p.Pos
901                                         q.Link = p.Link
902                                         p.Link = q
903                                         p = q
904                                 }
905                                 p.As = AADR
906                                 p.From.Type = obj.TYPE_BRANCH
907                                 p.From.Offset = 0
908                                 p.To.Type = obj.TYPE_REG
909                                 p.To.Reg = REGTMP
910
911                         }
912
913                         if p.As != obj.ARET {
914                                 q = newprog()
915                                 q.Pos = p.Pos
916                                 q.Link = p.Link
917                                 p.Link = q
918                                 p = q
919                         }
920
921                         if retjmp != nil { // retjmp
922                                 p.As = AB
923                                 p.To.Type = obj.TYPE_BRANCH
924                                 p.To.Sym = retjmp
925                                 p.Spadj = +c.autosize
926                                 break
927                         }
928
929                         p.As = obj.ARET
930                         p.To.Type = obj.TYPE_MEM
931                         p.To.Offset = 0
932                         p.To.Reg = REGLINK
933                         p.Spadj = +c.autosize
934
935                 case AADD, ASUB:
936                         if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST {
937                                 if p.As == AADD {
938                                         p.Spadj = int32(-p.From.Offset)
939                                 } else {
940                                         p.Spadj = int32(+p.From.Offset)
941                                 }
942                         }
943
944                 case obj.AGETCALLERPC:
945                         if cursym.Leaf() {
946                                 /* MOVD LR, Rd */
947                                 p.As = AMOVD
948                                 p.From.Type = obj.TYPE_REG
949                                 p.From.Reg = REGLINK
950                         } else {
951                                 /* MOVD (RSP), Rd */
952                                 p.As = AMOVD
953                                 p.From.Type = obj.TYPE_MEM
954                                 p.From.Reg = REGSP
955                         }
956
957                 case obj.ADUFFCOPY:
958                         //  ADR ret_addr, R27
959                         //  STP (FP, R27), -24(SP)
960                         //  SUB 24, SP, FP
961                         //  DUFFCOPY
962                         // ret_addr:
963                         //  SUB 8, SP, FP
964
965                         q1 := p
966                         // copy DUFFCOPY from q1 to q4
967                         q4 := obj.Appendp(p, c.newprog)
968                         q4.Pos = p.Pos
969                         q4.As = obj.ADUFFCOPY
970                         q4.To = p.To
971
972                         q1.As = AADR
973                         q1.From.Type = obj.TYPE_BRANCH
974                         q1.To.Type = obj.TYPE_REG
975                         q1.To.Reg = REG_R27
976
977                         q2 := obj.Appendp(q1, c.newprog)
978                         q2.Pos = p.Pos
979                         q2.As = ASTP
980                         q2.From.Type = obj.TYPE_REGREG
981                         q2.From.Reg = REGFP
982                         q2.From.Offset = int64(REG_R27)
983                         q2.To.Type = obj.TYPE_MEM
984                         q2.To.Reg = REGSP
985                         q2.To.Offset = -24
986
987                         // maintain FP for DUFFCOPY
988                         q3 := obj.Appendp(q2, c.newprog)
989                         q3.Pos = p.Pos
990                         q3.As = ASUB
991                         q3.From.Type = obj.TYPE_CONST
992                         q3.From.Offset = 24
993                         q3.Reg = REGSP
994                         q3.To.Type = obj.TYPE_REG
995                         q3.To.Reg = REGFP
996
997                         q5 := obj.Appendp(q4, c.newprog)
998                         q5.Pos = p.Pos
999                         q5.As = ASUB
1000                         q5.From.Type = obj.TYPE_CONST
1001                         q5.From.Offset = 8
1002                         q5.Reg = REGSP
1003                         q5.To.Type = obj.TYPE_REG
1004                         q5.To.Reg = REGFP
1005                         q1.From.SetTarget(q5)
1006                         p = q5
1007
1008                 case obj.ADUFFZERO:
1009                         //  ADR ret_addr, R27
1010                         //  STP (FP, R27), -24(SP)
1011                         //  SUB 24, SP, FP
1012                         //  DUFFZERO
1013                         // ret_addr:
1014                         //  SUB 8, SP, FP
1015
1016                         q1 := p
1017                         // copy DUFFZERO from q1 to q4
1018                         q4 := obj.Appendp(p, c.newprog)
1019                         q4.Pos = p.Pos
1020                         q4.As = obj.ADUFFZERO
1021                         q4.To = p.To
1022
1023                         q1.As = AADR
1024                         q1.From.Type = obj.TYPE_BRANCH
1025                         q1.To.Type = obj.TYPE_REG
1026                         q1.To.Reg = REG_R27
1027
1028                         q2 := obj.Appendp(q1, c.newprog)
1029                         q2.Pos = p.Pos
1030                         q2.As = ASTP
1031                         q2.From.Type = obj.TYPE_REGREG
1032                         q2.From.Reg = REGFP
1033                         q2.From.Offset = int64(REG_R27)
1034                         q2.To.Type = obj.TYPE_MEM
1035                         q2.To.Reg = REGSP
1036                         q2.To.Offset = -24
1037
1038                         // maintain FP for DUFFZERO
1039                         q3 := obj.Appendp(q2, c.newprog)
1040                         q3.Pos = p.Pos
1041                         q3.As = ASUB
1042                         q3.From.Type = obj.TYPE_CONST
1043                         q3.From.Offset = 24
1044                         q3.Reg = REGSP
1045                         q3.To.Type = obj.TYPE_REG
1046                         q3.To.Reg = REGFP
1047
1048                         q5 := obj.Appendp(q4, c.newprog)
1049                         q5.Pos = p.Pos
1050                         q5.As = ASUB
1051                         q5.From.Type = obj.TYPE_CONST
1052                         q5.From.Offset = 8
1053                         q5.Reg = REGSP
1054                         q5.To.Type = obj.TYPE_REG
1055                         q5.To.Reg = REGFP
1056                         q1.From.SetTarget(q5)
1057                         p = q5
1058                 }
1059
1060                 if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
1061                         f := c.cursym.Func()
1062                         if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 {
1063                                 c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE
1064                                 if ctxt.Debugvlog || !ctxt.IsAsm {
1065                                         ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p)
1066                                         if !ctxt.IsAsm {
1067                                                 ctxt.Diag("invalid auto-SPWRITE in non-assembly")
1068                                                 ctxt.DiagFlush()
1069                                                 log.Fatalf("bad SPWRITE")
1070                                         }
1071                                 }
1072                         }
1073                 }
1074                 if p.From.Type == obj.TYPE_SHIFT && (p.To.Reg == REG_RSP || p.Reg == REG_RSP) {
1075                         offset := p.From.Offset
1076                         op := offset & (3 << 22)
1077                         if op != SHIFT_LL {
1078                                 ctxt.Diag("illegal combination: %v", p)
1079                         }
1080                         r := (offset >> 16) & 31
1081                         shift := (offset >> 10) & 63
1082                         if shift > 4 {
1083                                 // the shift amount is out of range, in order to avoid repeated error
1084                                 // reportings, don't call ctxt.Diag, because asmout case 27 has the
1085                                 // same check.
1086                                 shift = 7
1087                         }
1088                         p.From.Type = obj.TYPE_REG
1089                         p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
1090                         p.From.Offset = 0
1091                 }
1092         }
1093 }
1094
1095 func nocache(p *obj.Prog) {
1096         p.Optab = 0
1097         p.From.Class = 0
1098         p.To.Class = 0
1099 }
1100
1101 var unaryDst = map[obj.As]bool{
1102         AWORD:  true,
1103         ADWORD: true,
1104         ABL:    true,
1105         AB:     true,
1106         ACLREX: true,
1107 }
1108
1109 var Linkarm64 = obj.LinkArch{
1110         Arch:           sys.ArchARM64,
1111         Init:           buildop,
1112         Preprocess:     preprocess,
1113         Assemble:       span7,
1114         Progedit:       progedit,
1115         UnaryDst:       unaryDst,
1116         DWARFRegisters: ARM64DWARFRegisters,
1117 }