src/cmd/compile/internal/ssa/memcombine.go

   1 // Copyright 2023 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/types"
  10         "cmd/internal/src"
  11         "sort"
  12 )
  13
  14 // memcombine combines smaller loads and stores into larger ones.
  15 // We ensure this generates good code for encoding/binary operations.
  16 // It may help other cases also.
  17 func memcombine(f *Func) {
  18         // This optimization requires that the architecture has
  19         // unaligned loads and unaligned stores.
  20         if !f.Config.unalignedOK {
  21                 return
  22         }
  23
  24         memcombineLoads(f)
  25         memcombineStores(f)
  26 }
  27
  28 func memcombineLoads(f *Func) {
  29         // Find "OR trees" to start with.
  30         mark := f.newSparseSet(f.NumValues())
  31         defer f.retSparseSet(mark)
  32         var order []*Value
  33
  34         // Mark all values that are the argument of an OR.
  35         for _, b := range f.Blocks {
  36                 for _, v := range b.Values {
  37                         if v.Op == OpOr16 || v.Op == OpOr32 || v.Op == OpOr64 {
  38                                 mark.add(v.Args[0].ID)
  39                                 mark.add(v.Args[1].ID)
  40                         }
  41                 }
  42         }
  43         for _, b := range f.Blocks {
  44                 for _, v := range b.Values {
  45                         if v.Op != OpOr16 && v.Op != OpOr32 && v.Op != OpOr64 {
  46                                 continue
  47                         }
  48                         if mark.contains(v.ID) {
  49                                 // marked - means it is not the root of an OR tree
  50                                 continue
  51                         }
  52                         // Add the OR tree rooted at v to the order.
  53                         // We use BFS here, but any walk that puts roots before leaves would work.
  54                         i := len(order)
  55                         order = append(order, v)
  56                         for ; i < len(order); i++ {
  57                                 x := order[i]
  58                                 for j := 0; j < 2; j++ {
  59                                         a := x.Args[j]
  60                                         if a.Op == OpOr16 || a.Op == OpOr32 || a.Op == OpOr64 {
  61                                                 order = append(order, a)
  62                                         }
  63                                 }
  64                         }
  65                 }
  66                 for _, v := range order {
  67                         max := f.Config.RegSize
  68                         switch v.Op {
  69                         case OpOr64:
  70                         case OpOr32:
  71                                 max = 4
  72                         case OpOr16:
  73                                 max = 2
  74                         default:
  75                                 continue
  76                         }
  77                         for n := max; n > 1; n /= 2 {
  78                                 if combineLoads(v, n) {
  79                                         break
  80                                 }
  81                         }
  82                 }
  83         }
  84 }
  85
  86 // A BaseAddress represents the address ptr+idx, where
  87 // ptr is a pointer type and idx is an integer type.
  88 // idx may be nil, in which case it is treated as 0.
  89 type BaseAddress struct {
  90         ptr *Value
  91         idx *Value
  92 }
  93
  94 // splitPtr returns the base address of ptr and any
  95 // constant offset from that base.
  96 // BaseAddress{ptr,nil},0 is always a valid result, but splitPtr
  97 // tries to peel away as many constants into off as possible.
  98 func splitPtr(ptr *Value) (BaseAddress, int64) {
  99         var idx *Value
 100         var off int64
 101         for {
 102                 if ptr.Op == OpOffPtr {
 103                         off += ptr.AuxInt
 104                         ptr = ptr.Args[0]
 105                 } else if ptr.Op == OpAddPtr {
 106                         if idx != nil {
 107                                 // We have two or more indexing values.
 108                                 // Pick the first one we found.
 109                                 return BaseAddress{ptr: ptr, idx: idx}, off
 110                         }
 111                         idx = ptr.Args[1]
 112                         if idx.Op == OpAdd32 || idx.Op == OpAdd64 {
 113                                 if idx.Args[0].Op == OpConst32 || idx.Args[0].Op == OpConst64 {
 114                                         off += idx.Args[0].AuxInt
 115                                         idx = idx.Args[1]
 116                                 } else if idx.Args[1].Op == OpConst32 || idx.Args[1].Op == OpConst64 {
 117                                         off += idx.Args[1].AuxInt
 118                                         idx = idx.Args[0]
 119                                 }
 120                         }
 121                         ptr = ptr.Args[0]
 122                 } else {
 123                         return BaseAddress{ptr: ptr, idx: idx}, off
 124                 }
 125         }
 126 }
 127
 128 func combineLoads(root *Value, n int64) bool {
 129         orOp := root.Op
 130         var shiftOp Op
 131         switch orOp {
 132         case OpOr64:
 133                 shiftOp = OpLsh64x64
 134         case OpOr32:
 135                 shiftOp = OpLsh32x64
 136         case OpOr16:
 137                 shiftOp = OpLsh16x64
 138         default:
 139                 return false
 140         }
 141
 142         // Find n values that are ORed together with the above op.
 143         a := make([]*Value, 0, 8)
 144         a = append(a, root)
 145         for i := 0; i < len(a) && int64(len(a)) < n; i++ {
 146                 v := a[i]
 147                 if v.Uses != 1 && v != root {
 148                         // Something in this subtree is used somewhere else.
 149                         return false
 150                 }
 151                 if v.Op == orOp {
 152                         a[i] = v.Args[0]
 153                         a = append(a, v.Args[1])
 154                         i--
 155                 }
 156         }
 157         if int64(len(a)) != n {
 158                 return false
 159         }
 160
 161         // Check that the first entry to see what ops we're looking for.
 162         // All the entries should be of the form shift(extend(load)), maybe with no shift.
 163         v := a[0]
 164         if v.Op == shiftOp {
 165                 v = v.Args[0]
 166         }
 167         var extOp Op
 168         if orOp == OpOr64 && (v.Op == OpZeroExt8to64 || v.Op == OpZeroExt16to64 || v.Op == OpZeroExt32to64) ||
 169                 orOp == OpOr32 && (v.Op == OpZeroExt8to32 || v.Op == OpZeroExt16to32) ||
 170                 orOp == OpOr16 && v.Op == OpZeroExt8to16 {
 171                 extOp = v.Op
 172                 v = v.Args[0]
 173         } else {
 174                 return false
 175         }
 176         if v.Op != OpLoad {
 177                 return false
 178         }
 179         base, _ := splitPtr(v.Args[0])
 180         mem := v.Args[1]
 181         size := v.Type.Size()
 182
 183         if root.Block.Func.Config.arch == "S390X" {
 184                 // s390x can't handle unaligned accesses to global variables.
 185                 if base.ptr.Op == OpAddr {
 186                         return false
 187                 }
 188         }
 189
 190         // Check all the entries, extract useful info.
 191         type LoadRecord struct {
 192                 load   *Value
 193                 offset int64 // offset of load address from base
 194                 shift  int64
 195         }
 196         r := make([]LoadRecord, n, 8)
 197         for i := int64(0); i < n; i++ {
 198                 v := a[i]
 199                 if v.Uses != 1 {
 200                         return false
 201                 }
 202                 shift := int64(0)
 203                 if v.Op == shiftOp {
 204                         if v.Args[1].Op != OpConst64 {
 205                                 return false
 206                         }
 207                         shift = v.Args[1].AuxInt
 208                         v = v.Args[0]
 209                         if v.Uses != 1 {
 210                                 return false
 211                         }
 212                 }
 213                 if v.Op != extOp {
 214                         return false
 215                 }
 216                 load := v.Args[0]
 217                 if load.Op != OpLoad {
 218                         return false
 219                 }
 220                 if load.Uses != 1 {
 221                         return false
 222                 }
 223                 if load.Args[1] != mem {
 224                         return false
 225                 }
 226                 p, off := splitPtr(load.Args[0])
 227                 if p != base {
 228                         return false
 229                 }
 230                 r[i] = LoadRecord{load: load, offset: off, shift: shift}
 231         }
 232
 233         // Sort in memory address order.
 234         sort.Slice(r, func(i, j int) bool {
 235                 return r[i].offset < r[j].offset
 236         })
 237
 238         // Check that we have contiguous offsets.
 239         for i := int64(0); i < n; i++ {
 240                 if r[i].offset != r[0].offset+i*size {
 241                         return false
 242                 }
 243         }
 244
 245         // Check for reads in little-endian or big-endian order.
 246         shift0 := r[0].shift
 247         isLittleEndian := true
 248         for i := int64(0); i < n; i++ {
 249                 if r[i].shift != shift0+i*size*8 {
 250                         isLittleEndian = false
 251                         break
 252                 }
 253         }
 254         isBigEndian := true
 255         for i := int64(0); i < n; i++ {
 256                 if r[i].shift != shift0-i*size*8 {
 257                         isBigEndian = false
 258                         break
 259                 }
 260         }
 261         if !isLittleEndian && !isBigEndian {
 262                 return false
 263         }
 264
 265         // Find a place to put the new load.
 266         // This is tricky, because it has to be at a point where
 267         // its memory argument is live. We can't just put it in root.Block.
 268         // We use the block of the latest load.
 269         loads := make([]*Value, n, 8)
 270         for i := int64(0); i < n; i++ {
 271                 loads[i] = r[i].load
 272         }
 273         loadBlock := mergePoint(root.Block, loads...)
 274         if loadBlock == nil {
 275                 return false
 276         }
 277         // Find a source position to use.
 278         pos := src.NoXPos
 279         for _, load := range loads {
 280                 if load.Block == loadBlock {
 281                         pos = load.Pos
 282                         break
 283                 }
 284         }
 285         if pos == src.NoXPos {
 286                 return false
 287         }
 288
 289         // Check to see if we need byte swap before storing.
 290         needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
 291                 isBigEndian && !root.Block.Func.Config.BigEndian
 292         if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
 293                 return false
 294         }
 295
 296         // This is the commit point.
 297
 298         // First, issue load at lowest address.
 299         v = loadBlock.NewValue2(pos, OpLoad, sizeType(n*size), r[0].load.Args[0], mem)
 300
 301         // Byte swap if needed,
 302         if needSwap {
 303                 v = byteSwap(loadBlock, pos, v)
 304         }
 305
 306         // Extend if needed.
 307         if n*size < root.Type.Size() {
 308                 v = zeroExtend(loadBlock, pos, v, n*size, root.Type.Size())
 309         }
 310
 311         // Shift if needed.
 312         if isLittleEndian && shift0 != 0 {
 313                 v = leftShift(loadBlock, pos, v, shift0)
 314         }
 315         if isBigEndian && shift0-(n-1)*8 != 0 {
 316                 v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
 317         }
 318
 319         // Install with (Copy v).
 320         root.reset(OpCopy)
 321         root.AddArg(v)
 322
 323         // Clobber the loads, just to prevent additional work being done on
 324         // subtrees (which are now unreachable).
 325         for i := int64(0); i < n; i++ {
 326                 clobber(r[i].load)
 327         }
 328         return true
 329 }
 330
 331 func memcombineStores(f *Func) {
 332         mark := f.newSparseSet(f.NumValues())
 333         defer f.retSparseSet(mark)
 334         var order []*Value
 335
 336         for _, b := range f.Blocks {
 337                 // Mark all stores which are not last in a store sequence.
 338                 mark.clear()
 339                 for _, v := range b.Values {
 340                         if v.Op == OpStore {
 341                                 mark.add(v.MemoryArg().ID)
 342                         }
 343                 }
 344
 345                 // pick an order for visiting stores such that
 346                 // later stores come earlier in the ordering.
 347                 order = order[:0]
 348                 for _, v := range b.Values {
 349                         if v.Op != OpStore {
 350                                 continue
 351                         }
 352                         if mark.contains(v.ID) {
 353                                 continue // not last in a chain of stores
 354                         }
 355                         for {
 356                                 order = append(order, v)
 357                                 v = v.Args[2]
 358                                 if v.Block != b || v.Op != OpStore {
 359                                         break
 360                                 }
 361                         }
 362                 }
 363
 364                 // Look for combining opportunities at each store in queue order.
 365                 for _, v := range order {
 366                         if v.Op != OpStore { // already rewritten
 367                                 continue
 368                         }
 369
 370                         size := v.Aux.(*types.Type).Size()
 371                         if size >= f.Config.RegSize || size == 0 {
 372                                 continue
 373                         }
 374
 375                         for n := f.Config.RegSize / size; n > 1; n /= 2 {
 376                                 if combineStores(v, n) {
 377                                         continue
 378                                 }
 379                         }
 380                 }
 381         }
 382 }
 383
 384 // Try to combine the n stores ending in root.
 385 // Returns true if successful.
 386 func combineStores(root *Value, n int64) bool {
 387         // Helper functions.
 388         type StoreRecord struct {
 389                 store  *Value
 390                 offset int64
 391         }
 392         getShiftBase := func(a []StoreRecord) *Value {
 393                 x := a[0].store.Args[1]
 394                 y := a[1].store.Args[1]
 395                 switch x.Op {
 396                 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 397                         x = x.Args[0]
 398                 default:
 399                         return nil
 400                 }
 401                 switch y.Op {
 402                 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 403                         y = y.Args[0]
 404                 default:
 405                         return nil
 406                 }
 407                 var x2 *Value
 408                 switch x.Op {
 409                 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
 410                         x2 = x.Args[0]
 411                 default:
 412                 }
 413                 var y2 *Value
 414                 switch y.Op {
 415                 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
 416                         y2 = y.Args[0]
 417                 default:
 418                 }
 419                 if y2 == x {
 420                         // a shift of x and x itself.
 421                         return x
 422                 }
 423                 if x2 == y {
 424                         // a shift of y and y itself.
 425                         return y
 426                 }
 427                 if x2 == y2 {
 428                         // 2 shifts both of the same argument.
 429                         return x2
 430                 }
 431                 return nil
 432         }
 433         isShiftBase := func(v, base *Value) bool {
 434                 val := v.Args[1]
 435                 switch val.Op {
 436                 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 437                         val = val.Args[0]
 438                 default:
 439                         return false
 440                 }
 441                 if val == base {
 442                         return true
 443                 }
 444                 switch val.Op {
 445                 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
 446                         val = val.Args[0]
 447                 default:
 448                         return false
 449                 }
 450                 return val == base
 451         }
 452         shift := func(v, base *Value) int64 {
 453                 val := v.Args[1]
 454                 switch val.Op {
 455                 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 456                         val = val.Args[0]
 457                 default:
 458                         return -1
 459                 }
 460                 if val == base {
 461                         return 0
 462                 }
 463                 switch val.Op {
 464                 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
 465                         val = val.Args[1]
 466                 default:
 467                         return -1
 468                 }
 469                 if val.Op != OpConst64 {
 470                         return -1
 471                 }
 472                 return val.AuxInt
 473         }
 474
 475         // Element size of the individual stores.
 476         size := root.Aux.(*types.Type).Size()
 477         if size*n > root.Block.Func.Config.RegSize {
 478                 return false
 479         }
 480
 481         // Gather n stores to look at. Check easy conditions we require.
 482         a := make([]StoreRecord, 0, 8)
 483         rbase, roff := splitPtr(root.Args[0])
 484         if root.Block.Func.Config.arch == "S390X" {
 485                 // s390x can't handle unaligned accesses to global variables.
 486                 if rbase.ptr.Op == OpAddr {
 487                         return false
 488                 }
 489         }
 490         a = append(a, StoreRecord{root, roff})
 491         for i, x := int64(1), root.Args[2]; i < n; i, x = i+1, x.Args[2] {
 492                 if x.Op != OpStore {
 493                         return false
 494                 }
 495                 if x.Block != root.Block {
 496                         return false
 497                 }
 498                 if x.Uses != 1 { // Note: root can have more than one use.
 499                         return false
 500                 }
 501                 if x.Aux.(*types.Type).Size() != size {
 502                         // TODO: the constant source and consecutive load source cases
 503                         // do not need all the stores to be the same size.
 504                         return false
 505                 }
 506                 base, off := splitPtr(x.Args[0])
 507                 if base != rbase {
 508                         return false
 509                 }
 510                 a = append(a, StoreRecord{x, off})
 511         }
 512         // Before we sort, grab the memory arg the result should have.
 513         mem := a[n-1].store.Args[2]
 514
 515         // Sort stores in increasing address order.
 516         sort.Slice(a, func(i, j int) bool {
 517                 return a[i].offset < a[j].offset
 518         })
 519
 520         // Check that everything is written to sequential locations.
 521         for i := int64(0); i < n; i++ {
 522                 if a[i].offset != a[0].offset+i*size {
 523                         return false
 524                 }
 525         }
 526
 527         // Memory location we're going to write at (the lowest one).
 528         ptr := a[0].store.Args[0]
 529
 530         // Check for constant stores
 531         isConst := true
 532         for i := int64(0); i < n; i++ {
 533                 switch a[i].store.Args[1].Op {
 534                 case OpConst32, OpConst16, OpConst8:
 535                 default:
 536                         isConst = false
 537                         break
 538                 }
 539         }
 540         if isConst {
 541                 // Modify root to do all the stores.
 542                 var c int64
 543                 mask := int64(1)<<(8*size) - 1
 544                 for i := int64(0); i < n; i++ {
 545                         s := 8 * size * int64(i)
 546                         if root.Block.Func.Config.BigEndian {
 547                                 s = 8*size*(n-1) - s
 548                         }
 549                         c |= (a[i].store.Args[1].AuxInt & mask) << s
 550                 }
 551                 var cv *Value
 552                 switch size * n {
 553                 case 2:
 554                         cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
 555                 case 4:
 556                         cv = root.Block.Func.ConstInt32(types.Types[types.TUINT32], int32(c))
 557                 case 8:
 558                         cv = root.Block.Func.ConstInt64(types.Types[types.TUINT64], c)
 559                 }
 560
 561                 // Move all the stores to the root.
 562                 for i := int64(0); i < n; i++ {
 563                         v := a[i].store
 564                         if v == root {
 565                                 v.Aux = cv.Type // widen store type
 566                                 v.SetArg(0, ptr)
 567                                 v.SetArg(1, cv)
 568                                 v.SetArg(2, mem)
 569                         } else {
 570                                 clobber(v)
 571                                 v.Type = types.Types[types.TBOOL] // erase memory type
 572                         }
 573                 }
 574                 return true
 575         }
 576
 577         // Check for consecutive loads as the source of the stores.
 578         var loadMem *Value
 579         var loadBase BaseAddress
 580         var loadIdx int64
 581         for i := int64(0); i < n; i++ {
 582                 load := a[i].store.Args[1]
 583                 if load.Op != OpLoad {
 584                         loadMem = nil
 585                         break
 586                 }
 587                 if load.Uses != 1 {
 588                         loadMem = nil
 589                         break
 590                 }
 591                 if load.Type.IsPtr() {
 592                         // Don't combine stores containing a pointer, as we need
 593                         // a write barrier for those. This can't currently happen,
 594                         // but might in the future if we ever have another
 595                         // 8-byte-reg/4-byte-ptr architecture like amd64p32.
 596                         loadMem = nil
 597                         break
 598                 }
 599                 mem := load.Args[1]
 600                 base, idx := splitPtr(load.Args[0])
 601                 if loadMem == nil {
 602                         // First one we found
 603                         loadMem = mem
 604                         loadBase = base
 605                         loadIdx = idx
 606                         continue
 607                 }
 608                 if base != loadBase || mem != loadMem {
 609                         loadMem = nil
 610                         break
 611                 }
 612                 if idx != loadIdx+(a[i].offset-a[0].offset) {
 613                         loadMem = nil
 614                         break
 615                 }
 616         }
 617         if loadMem != nil {
 618                 // Modify the first load to do a larger load instead.
 619                 load := a[0].store.Args[1]
 620                 switch size * n {
 621                 case 2:
 622                         load.Type = types.Types[types.TUINT16]
 623                 case 4:
 624                         load.Type = types.Types[types.TUINT32]
 625                 case 8:
 626                         load.Type = types.Types[types.TUINT64]
 627                 }
 628
 629                 // Modify root to do the store.
 630                 for i := int64(0); i < n; i++ {
 631                         v := a[i].store
 632                         if v == root {
 633                                 v.Aux = load.Type // widen store type
 634                                 v.SetArg(0, ptr)
 635                                 v.SetArg(1, load)
 636                                 v.SetArg(2, mem)
 637                         } else {
 638                                 clobber(v)
 639                                 v.Type = types.Types[types.TBOOL] // erase memory type
 640                         }
 641                 }
 642                 return true
 643         }
 644
 645         // Check that all the shift/trunc are of the same base value.
 646         shiftBase := getShiftBase(a)
 647         if shiftBase == nil {
 648                 return false
 649         }
 650         for i := int64(0); i < n; i++ {
 651                 if !isShiftBase(a[i].store, shiftBase) {
 652                         return false
 653                 }
 654         }
 655
 656         // Check for writes in little-endian or big-endian order.
 657         isLittleEndian := true
 658         shift0 := shift(a[0].store, shiftBase)
 659         for i := int64(1); i < n; i++ {
 660                 if shift(a[i].store, shiftBase) != shift0+i*8 {
 661                         isLittleEndian = false
 662                         break
 663                 }
 664         }
 665         isBigEndian := true
 666         for i := int64(1); i < n; i++ {
 667                 if shift(a[i].store, shiftBase) != shift0-i*8 {
 668                         isBigEndian = false
 669                         break
 670                 }
 671         }
 672         if !isLittleEndian && !isBigEndian {
 673                 return false
 674         }
 675
 676         // Check to see if we need byte swap before storing.
 677         needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
 678                 isBigEndian && !root.Block.Func.Config.BigEndian
 679         if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
 680                 return false
 681         }
 682
 683         // This is the commit point.
 684
 685         // Modify root to do all the stores.
 686         sv := shiftBase
 687         if isLittleEndian && shift0 != 0 {
 688                 sv = rightShift(root.Block, root.Pos, sv, shift0)
 689         }
 690         if isBigEndian && shift0-(n-1)*8 != 0 {
 691                 sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*8)
 692         }
 693         if sv.Type.Size() > size*n {
 694                 sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n)
 695         }
 696         if needSwap {
 697                 sv = byteSwap(root.Block, root.Pos, sv)
 698         }
 699
 700         // Move all the stores to the root.
 701         for i := int64(0); i < n; i++ {
 702                 v := a[i].store
 703                 if v == root {
 704                         v.Aux = sv.Type // widen store type
 705                         v.SetArg(0, ptr)
 706                         v.SetArg(1, sv)
 707                         v.SetArg(2, mem)
 708                 } else {
 709                         clobber(v)
 710                         v.Type = types.Types[types.TBOOL] // erase memory type
 711                 }
 712         }
 713         return true
 714 }
 715
 716 func sizeType(size int64) *types.Type {
 717         switch size {
 718         case 8:
 719                 return types.Types[types.TUINT64]
 720         case 4:
 721                 return types.Types[types.TUINT32]
 722         case 2:
 723                 return types.Types[types.TUINT16]
 724         default:
 725                 base.Fatalf("bad size %d\n", size)
 726                 return nil
 727         }
 728 }
 729
 730 func truncate(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
 731         switch from*10 + to {
 732         case 82:
 733                 return b.NewValue1(pos, OpTrunc64to16, types.Types[types.TUINT16], v)
 734         case 84:
 735                 return b.NewValue1(pos, OpTrunc64to32, types.Types[types.TUINT32], v)
 736         case 42:
 737                 return b.NewValue1(pos, OpTrunc32to16, types.Types[types.TUINT16], v)
 738         default:
 739                 base.Fatalf("bad sizes %d %d\n", from, to)
 740                 return nil
 741         }
 742 }
 743 func zeroExtend(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
 744         switch from*10 + to {
 745         case 24:
 746                 return b.NewValue1(pos, OpZeroExt16to32, types.Types[types.TUINT32], v)
 747         case 28:
 748                 return b.NewValue1(pos, OpZeroExt16to64, types.Types[types.TUINT64], v)
 749         case 48:
 750                 return b.NewValue1(pos, OpZeroExt32to64, types.Types[types.TUINT64], v)
 751         default:
 752                 base.Fatalf("bad sizes %d %d\n", from, to)
 753                 return nil
 754         }
 755 }
 756
 757 func leftShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
 758         s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
 759         size := v.Type.Size()
 760         switch size {
 761         case 8:
 762                 return b.NewValue2(pos, OpLsh64x64, v.Type, v, s)
 763         case 4:
 764                 return b.NewValue2(pos, OpLsh32x64, v.Type, v, s)
 765         case 2:
 766                 return b.NewValue2(pos, OpLsh16x64, v.Type, v, s)
 767         default:
 768                 base.Fatalf("bad size %d\n", size)
 769                 return nil
 770         }
 771 }
 772 func rightShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
 773         s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
 774         size := v.Type.Size()
 775         switch size {
 776         case 8:
 777                 return b.NewValue2(pos, OpRsh64Ux64, v.Type, v, s)
 778         case 4:
 779                 return b.NewValue2(pos, OpRsh32Ux64, v.Type, v, s)
 780         case 2:
 781                 return b.NewValue2(pos, OpRsh16Ux64, v.Type, v, s)
 782         default:
 783                 base.Fatalf("bad size %d\n", size)
 784                 return nil
 785         }
 786 }
 787 func byteSwap(b *Block, pos src.XPos, v *Value) *Value {
 788         switch v.Type.Size() {
 789         case 8:
 790                 return b.NewValue1(pos, OpBswap64, v.Type, v)
 791         case 4:
 792                 return b.NewValue1(pos, OpBswap32, v.Type, v)
 793         case 2:
 794                 return b.NewValue1(pos, OpBswap16, v.Type, v)
 795
 796         default:
 797                 v.Fatalf("bad size %d\n", v.Type.Size())
 798                 return nil
 799         }
 800 }