1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Writing and reading of Go object files.
7 // Originally, Go object files were Plan 9 object files, but no longer.
8 // Now they are more like standard object files, in that each symbol is defined
9 // by an associated memory image (bytes) and a list of relocations to apply
10 // during linking. We do not (yet?) use a standard file format, however.
11 // For now, the format is chosen to be as simple as possible to read and write.
12 // It may change for reasons of efficiency, or we may even switch to a
13 // standard file format if there are compelling benefits to doing so.
14 // See golang.org/s/go13linker for more background.
16 // The file format is:
18 // - magic header: "\x00\x00go13ld"
19 // - byte 1 - version number
20 // - sequence of strings giving dependencies (imported packages)
21 // - empty string (marks end of sequence)
22 // - sequence of defined symbols
23 // - byte 0xff (marks end of sequence)
24 // - magic footer: "\xff\xffgo13ld"
26 // All integers are stored in a zigzag varint format.
27 // See golang.org/s/go12symtab for a definition.
29 // Data blocks and strings are both stored as an integer
30 // followed by that many bytes.
32 // A symbol reference is a string name followed by a version.
33 // An empty name corresponds to a nil LSym* pointer.
35 // Each symbol is laid out as the following fields (taken from LSym*):
37 // - byte 0xfe (sanity check for synchronization)
44 // - gotype [symbol reference]
47 // - r [nr relocations, sorted by off]
49 // If type == STEXT, there are a few more fields:
58 // - local [nlocal automatics]
59 // - pcln [pcln table]
61 // Each relocation has the encoding:
68 // - sym [symbol reference]
69 // - xsym [symbol reference]
71 // Each local has the encoding:
73 // - asym [symbol reference]
76 // - gotype [symbol reference]
78 // The pcln table has the encoding:
80 // - pcsp [data block]
81 // - pcfile [data block]
82 // - pcline [data block]
84 // - pcdata [npcdata data blocks]
86 // - funcdata [nfuncdata symbol references]
87 // - funcdatasym [nfuncdata ints]
89 // - file [nfile symbol references]
91 // The file layout and meaning of type integers are architecture-independent.
93 // TODO(rsc): The file format is good for a first pass but needs work.
94 // - There are SymID in the object file that should really just be strings.
95 // - The actual symbol memory images are interlaced with the symbol
96 // metadata. They should be separated, to reduce the I/O required to
97 // load just the metadata.
98 // - The symbol references should be shortened, either with a symbol
99 // table or by using a simple backward index to an earlier mentioned symbol.
105 #include "../cmd/ld/textflag.h"
106 #include "../runtime/funcdata.h"
108 static void writesym(Link*, Biobuf*, LSym*);
109 static void wrint(Biobuf*, int64);
110 static void wrstring(Biobuf*, char*);
111 static void wrpath(Link *, Biobuf*, char*);
112 static void wrdata(Biobuf*, void*, int);
113 static void wrsym(Biobuf*, LSym*);
114 static void wrpathsym(Link *ctxt, Biobuf *b, LSym *s);
116 static void readsym(Link*, Biobuf*, char*, char*);
117 static int64 rdint(Biobuf*);
118 static char *rdstring(Biobuf*);
119 static void rddata(Biobuf*, uchar**, int*);
120 static LSym *rdsym(Link*, Biobuf*, char*);
122 void writeobjdirect(Link *ctxt, Biobuf *b);
124 void writeobjgo1(Link*, char*);
125 void writeobjgo2(Link*, char*, int64);
127 extern char *outfile;
130 writeobj(Link *ctxt, Biobuf *b)
135 // If $GOOBJ > 0, invoke the Go version of the liblink
136 // output routines via a subprocess.
137 // If $GOOBJ == 1, copy that subprocess's output to
138 // the actual output file.
139 // If $GOOBJ >= 2, generate output using the usual C version
140 // but then check that the subprocess wrote the same bytes.
141 // $GOOBJ is a temporary setting for the transition to a
142 // Go liblink back end. Once the C liblink back ends are deleted,
143 // we will hard code the GOOBJ=1 behavior.
144 env = getenv("GOOBJ");
148 writeobjdirect(ctxt, b);
154 writeobjgo1(ctxt, outfile);
156 writeobjdirect(ctxt, b);
159 writeobjgo2(ctxt, outfile, start);
163 // The Go and C compilers, and the assembler, call writeobj to write
164 // out a Go object file. The linker does not call this; the linker
165 // does not write out object files.
167 writeobjdirect(Link *ctxt, Biobuf *b)
171 LSym *s, *text, *etext, *curtext, *data, *edata;
176 // Build list of symbols, and assign instructions to lists.
177 // Ignore ctxt->plist boundaries. There are no guarantees there,
178 // and the C compilers and assemblers just use one big list.
184 for(pl = ctxt->plist; pl != nil; pl = pl->link) {
185 for(p = pl->firstpc; p != nil; p = plink) {
186 if(ctxt->debugasm && ctxt->debugvlog)
187 print("obj: %P\n", p);
195 // Assume each TYPE instruction describes
196 // a different local variable or parameter,
198 // Using only the TYPE instructions means
199 // that we discard location information about local variables
200 // in C and assembly functions; that information is inferred
201 // from ordinary references, because there are no TYPE
202 // instructions there. Without the type information, gdb can't
203 // use the locations, so we don't bother to save them.
204 // If something else could use them, we could arrange to
208 a = emallocz(sizeof *a);
209 a->asym = p->from.sym;
210 a->aoffset = p->from.offset;
211 a->name = p->from.name;
212 a->gotype = p->from.gotype;
213 a->link = curtext->autom;
218 if(p->as == AGLOBL) {
221 print("duplicate %P\n", p);
223 sysfatal("symbol %s listed multiple times", s->name);
230 s->size = p->to.offset;
231 if(s->type == 0 || s->type == SXREF)
233 flag = p->from3.offset;
238 else if(flag & NOPTR)
245 savedata(ctxt, p->from.sym, p, "<input>");
257 sysfatal("duplicate TEXT for %s", s->name);
259 sysfatal("symbol %s listed multiple times", s->name);
266 flag = p->from3.offset;
279 if(p->as == AFUNCDATA) {
280 // Rewrite reference to go_args_stackmap(SB) to the Go-provided declaration information.
281 if(curtext == nil) // func _() {}
283 if(strcmp(p->to.sym->name, "go_args_stackmap") == 0) {
284 if(p->from.type != TYPE_CONST || p->from.offset != FUNCDATA_ArgsPointerMaps)
285 ctxt->diag("FUNCDATA use of go_args_stackmap(SB) without FUNCDATA_ArgsPointerMaps");
286 p->to.sym = linklookup(ctxt, smprint("%s.args_stackmap", curtext->name), curtext->version);
298 // Add reference to Go arguments for C or assembly functions without them.
299 for(s = text; s != nil; s = s->next) {
300 if(strncmp(s->name, "\"\".", 3) != 0)
303 for(p = s->text; p != nil; p = p->link) {
304 if(p->as == AFUNCDATA && p->from.type == TYPE_CONST && p->from.offset == FUNCDATA_ArgsPointerMaps) {
310 p = appendp(ctxt, s->text);
312 p->from.type = TYPE_CONST;
313 p->from.offset = FUNCDATA_ArgsPointerMaps;
314 p->to.type = TYPE_MEM;
315 p->to.name = NAME_EXTERN;
316 p->to.sym = linklookup(ctxt, smprint("%s.args_stackmap", s->name), s->version);
320 // Turn functions into machine code images.
321 for(s = text; s != nil; s = s->next) {
324 ctxt->arch->follow(ctxt, s);
325 ctxt->arch->preprocess(ctxt, s);
326 ctxt->arch->assemble(ctxt, s);
334 Bputc(b, 1); // version
337 for(h = ctxt->hist; h != nil; h = h->link)
339 wrstring(b, h->name);
343 for(s = text; s != nil; s = s->next)
344 writesym(ctxt, b, s);
345 for(s = data; s != nil; s = s->next)
346 writesym(ctxt, b, s);
355 writesym(Link *ctxt, Biobuf *b, LSym *s)
365 Bprint(ctxt->bso, "%s ", s->name);
367 Bprint(ctxt->bso, "v=%d ", s->version);
369 Bprint(ctxt->bso, "t=%d ", s->type);
371 Bprint(ctxt->bso, "dupok ");
373 Bprint(ctxt->bso, "cfunc ");
375 Bprint(ctxt->bso, "nosplit ");
376 Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
377 if(s->type == STEXT) {
378 Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
380 Bprint(ctxt->bso, " leaf");
382 Bprint(ctxt->bso, "\n");
383 for(p=s->text; p != nil; p = p->link)
384 Bprint(ctxt->bso, "\t%#06ux %P\n", (int)p->pc, p);
385 for(i=0; i<s->np; ) {
386 Bprint(ctxt->bso, "\t%#06ux", i);
387 for(j=i; j<i+16 && j<s->np; j++)
388 Bprint(ctxt->bso, " %02ux", s->p[j]);
390 Bprint(ctxt->bso, " ");
391 Bprint(ctxt->bso, " ");
392 for(j=i; j<i+16 && j<s->np; j++) {
394 if(' ' <= c && c <= 0x7e)
395 Bprint(ctxt->bso, "%c", c);
397 Bprint(ctxt->bso, ".");
399 Bprint(ctxt->bso, "\n");
402 for(i=0; i<s->nr; i++) {
407 if(ctxt->arch->thechar == '5' || ctxt->arch->thechar == '9')
408 Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%llux\n", (int)r->off, r->siz, r->type, name, (vlong)r->add);
410 Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, name, (vlong)r->add);
416 wrstring(b, s->name);
417 wrint(b, s->version);
421 wrdata(b, s->p, s->np);
424 for(i=0; i<s->nr; i++) {
435 if(s->type == STEXT) {
438 wrint(b, s->nosplit);
439 wrint(b, s->leaf | s->cfunc<<1);
441 for(a = s->autom; a != nil; a = a->link)
444 for(a = s->autom; a != nil; a = a->link) {
446 wrint(b, a->aoffset);
447 if(a->name == NAME_AUTO)
449 else if(a->name == NAME_PARAM)
452 sysfatal("%s: invalid local variable type %d", s->name, a->name);
457 wrdata(b, pc->pcsp.p, pc->pcsp.n);
458 wrdata(b, pc->pcfile.p, pc->pcfile.n);
459 wrdata(b, pc->pcline.p, pc->pcline.n);
460 wrint(b, pc->npcdata);
461 for(i=0; i<pc->npcdata; i++)
462 wrdata(b, pc->pcdata[i].p, pc->pcdata[i].n);
463 wrint(b, pc->nfuncdata);
464 for(i=0; i<pc->nfuncdata; i++)
465 wrsym(b, pc->funcdata[i]);
466 for(i=0; i<pc->nfuncdata; i++)
467 wrint(b, pc->funcdataoff[i]);
469 for(i=0; i<pc->nfile; i++)
470 wrpathsym(ctxt, b, pc->file[i]);
475 wrint(Biobuf *b, int64 sval)
480 uv = ((uint64)sval<<1) ^ (uint64)(int64)(sval>>63);
483 for(v = uv; v >= 0x80; v >>= 7)
487 Bwrite(b, buf, p - buf);
491 wrstring(Biobuf *b, char *s)
493 wrdata(b, s, strlen(s));
496 // wrpath writes a path just like a string, but on windows, it
497 // translates '\\' to '/' in the process.
499 wrpath(Link *ctxt, Biobuf *b, char *p)
502 if (!ctxt->windows || strchr(p, '\\') == nil) {
508 for (i = 0; i < n; i++)
509 Bputc(b, p[i] == '\\' ? '/' : p[i]);
514 wrdata(Biobuf *b, void *v, int n)
521 wrpathsym(Link *ctxt, Biobuf *b, LSym *s)
528 wrpath(ctxt, b, s->name);
529 wrint(b, s->version);
533 wrsym(Biobuf *b, LSym *s)
540 wrstring(b, s->name);
541 wrint(b, s->version);
544 static char startmagic[] = "\x00\x00go13ld";
545 static char endmagic[] = "\xff\xffgo13ld";
548 ldobjfile(Link *ctxt, Biobuf *f, char *pkg, int64 len, char *pn)
557 memset(buf, 0, sizeof buf);
558 Bread(f, buf, sizeof buf);
559 if(memcmp(buf, startmagic, sizeof buf) != 0)
560 sysfatal("%s: invalid file start %x %x %x %x %x %x %x %x", pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
561 if((c = Bgetc(f)) != 1)
562 sysfatal("%s: invalid file version number %d", pn, c);
568 addlib(ctxt, pkg, pn, lib);
576 readsym(ctxt, f, pkg, pn);
579 memset(buf, 0, sizeof buf);
580 Bread(f, buf, sizeof buf);
581 if(memcmp(buf, endmagic, sizeof buf) != 0)
582 sysfatal("%s: invalid file end", pn);
584 if(Boffset(f) != start+len)
585 sysfatal("%s: unexpected end at %lld, want %lld", pn, (vlong)Boffset(f), (vlong)(start+len));
589 readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
591 int i, j, c, t, v, n, ndata, nreloc, size, dupok;
601 sysfatal("readsym out of sync");
603 name = expandpkg(rdstring(f), pkg);
606 sysfatal("invalid symbol version %d", v);
610 typ = rdsym(ctxt, f, pkg);
611 rddata(f, &data, &ndata);
616 s = linklookup(ctxt, name, v);
618 if(s->type != 0 && s->type != SXREF) {
619 if((t == SDATA || t == SBSS || t == SNOPTRBSS) && ndata == 0 && nreloc == 0) {
622 if(typ != nil && s->gotype == nil)
626 if((s->type == SDATA || s->type == SBSS || s->type == SNOPTRBSS) && s->np == 0 && s->nr == 0)
628 if(s->type != SBSS && s->type != SNOPTRBSS && !dupok && !s->dupok)
629 sysfatal("duplicate symbol %s (types %d and %d) in %s and %s", s->name, s->type, t, s->file, pn);
632 s = linknewsym(ctxt, ".dup", ndup++); // scratch
639 sysfatal("bad sxref");
641 sysfatal("missing type for %s in %s", name, pn);
642 if(t == SBSS && (s->type == SRODATA || s->type == SNOPTRBSS))
647 if(typ != nil) // if bss sym defined multiple times, take type from any one def
649 if(dup != nil && typ != nil)
655 s->r = emallocz(nreloc * sizeof s->r[0]);
658 for(i=0; i<nreloc; i++) {
665 r->sym = rdsym(ctxt, f, pkg);
666 r->xsym = rdsym(ctxt, f, pkg);
670 if(s->np > 0 && dup != nil && dup->np > 0 && strncmp(s->name, "gclocals·", 10) == 0) {
671 // content-addressed garbage collection liveness bitmap symbol.
672 // double check for hash collisions.
673 if(s->np != dup->np || memcmp(s->p, dup->p, s->np) != 0)
674 sysfatal("dupok hash collision for %s in %s and %s", s->name, s->file, pn);
677 if(s->type == STEXT) {
679 s->locals = rdint(f);
680 s->nosplit = rdint(f);
686 a = emallocz(sizeof *a);
687 a->asym = rdsym(ctxt, f, pkg);
688 a->aoffset = rdint(f);
690 a->gotype = rdsym(ctxt, f, pkg);
695 s->pcln = emallocz(sizeof *s->pcln);
697 rddata(f, &pc->pcsp.p, &pc->pcsp.n);
698 rddata(f, &pc->pcfile.p, &pc->pcfile.n);
699 rddata(f, &pc->pcline.p, &pc->pcline.n);
701 pc->pcdata = emallocz(n * sizeof pc->pcdata[0]);
704 rddata(f, &pc->pcdata[i].p, &pc->pcdata[i].n);
706 pc->funcdata = emallocz(n * sizeof pc->funcdata[0]);
707 pc->funcdataoff = emallocz(n * sizeof pc->funcdataoff[0]);
710 pc->funcdata[i] = rdsym(ctxt, f, pkg);
712 pc->funcdataoff[i] = rdint(f);
714 pc->file = emallocz(n * sizeof pc->file[0]);
717 pc->file[i] = rdsym(ctxt, f, pkg);
721 sysfatal("symbol %s listed multiple times", s->name);
724 ctxt->etextp->next = s;
732 Bprint(ctxt->bso, "%s ", s->name);
734 Bprint(ctxt->bso, "v=%d ", s->version);
736 Bprint(ctxt->bso, "t=%d ", s->type);
738 Bprint(ctxt->bso, "dupok ");
740 Bprint(ctxt->bso, "cfunc ");
742 Bprint(ctxt->bso, "nosplit ");
743 Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
745 Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
746 Bprint(ctxt->bso, "\n");
747 for(i=0; i<s->np; ) {
748 Bprint(ctxt->bso, "\t%#06ux", i);
749 for(j=i; j<i+16 && j<s->np; j++)
750 Bprint(ctxt->bso, " %02ux", s->p[j]);
752 Bprint(ctxt->bso, " ");
753 Bprint(ctxt->bso, " ");
754 for(j=i; j<i+16 && j<s->np; j++) {
756 if(' ' <= c && c <= 0x7e)
757 Bprint(ctxt->bso, "%c", c);
759 Bprint(ctxt->bso, ".");
761 Bprint(ctxt->bso, "\n");
764 for(i=0; i<s->nr; i++) {
766 Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, r->sym->name, (vlong)r->add);
779 for(shift = 0;; shift += 7) {
781 sysfatal("corrupt input");
783 uv |= (uint64)(c & 0x7F) << shift;
788 return (int64)(uv>>1) ^ ((int64)((uint64)uv<<63)>>63);
804 rddata(Biobuf *f, uchar **pp, int *np)
812 rdsym(Link *ctxt, Biobuf *f, char *pkg)
828 s = linklookup(ctxt, expandpkg(p, pkg), v);
830 if(v == 0 && s->name[0] == '$' && s->type == 0) {
831 if(strncmp(s->name, "$f32.", 5) == 0) {
833 i32 = strtoul(s->name+5, nil, 16);
835 adduint32(ctxt, s, i32);
837 } else if(strncmp(s->name, "$f64.", 5) == 0 || strncmp(s->name, "$i64.", 5) == 0) {
839 i64 = strtoull(s->name+5, nil, 16);
841 adduint64(ctxt, s, i64);