[PATCH 00/19] CTF linking support

classic Classic list List threaded Threaded
25 messages Options
12
Reply | Threaded
Open this post in threaded view
|

[PATCH 00/19] CTF linking support

Nick Alcock
This is the beginning of support for linking CTF sections.  The type
deduplicator is not very good, creation of "subsections" for types and variables
with conflicting definitions in distinct TUs hardly ever happens even when it
should, and mingw might well be in trouble because I am using tmpfile() and am
not pulling in gnulib because I haven't figured out how to do it in an
Automake-using binutils project yet :) however, we only use tmpfile() when we
encounter conflicting definitions, so I figure we can fix that problem in the
next series.  It may well also be broken on non-ELF platforms again: I'm not
even sure what an example of such a platform is, let alone how to target one
without piles of system headers etc that I don't have.  (See patch 19 for
questions on this.)

This is *not* ready to go upstream yet -- it is broken on non-ELF.  I mean to
fix that soon, but I thought I should provide some indication of the sort of
thing I'm doing, regardless.  (The code changes to work on ELF will probably be
quite small, moving a bit of code out of ldlang.c into the elf32 emulation via
yet another callback.)

Also this is currently much too slow (10s to link ld itself, for instance), but
you only take the speed hit when CTF sections are present, and the delay is
entirely down to the thing I'm using as a deduplicator right now, which was
really not designed for it and will be rewritten.

Most of the job of linking is done by code in libctf itself, which I am the
maintainer of: I'm happy to have people look at it but I'm fairly confident
about what I'm doing in there. But the last patch... the last patch ties it
into the BFD and ld linking machinery. I really *need* review of that one,
and probably of the one before it as well since that too touches bfd. It
works where none of the dozens of other approaches I tried came close to
working, but I have no idea if it is the right way to do things at all.
Extensive questions for reviewers are in the commit log for that patch.

But despite the caveats in patch 19, in conjunction with a CTF-capable GCC,
it does work, without warnings or leaks, and merges the type sections down
quite satisfactorily:

oranix@loom 551 % size -A /tmp/gcc/bin/ld
/tmp/gcc/bin/ld  :
section                  size      addr
.interp                    28   4194984
.note.gnu.build-id         36   4195012
.note.ABI-tag              32   4195048
.gnu.hash                 172   4195080
.dynsym                  3264   4195256
.dynstr                  1116   4198520
.gnu.version              272   4199636
.gnu.version_r            144   4199912
.rela.dyn                 216   4200056
.rela.plt                2808   4200272
.init                      23   4206592
.plt                     1888   4206624
.text                  949825   4208512
.fini                       9   5158340
.rodata               1447296   5160960
.eh_frame_hdr           19172   6608256
.eh_frame              122760   6627432
.init_array                 8   6757888
.fini_array                 8   6757896
.dynamic                  480   6757904
.got                       16   6758384
.got.plt                  960   6758400
.data                   25136   6759360
.bss                    22976   6784512
.comment                   68         0
.debug_aranges           6320         0
.debug_info           4323894         0
.debug_abbrev          144836         0
.debug_line            750267         0
.debug_str             231568         0
.debug_loc            2069864         0
.debug_ranges          179760         0
.ctf                   212598   6815680
Total                10517820

oranix@loom 552 % PATH=/tmp/gcc/bin:$PATH objdump --ctf=.ctf /tmp/gcc/bin/ld

/tmp/gcc/bin/ld:     file format elf64-x86-64

Contents of CTF section .ctf:

  Header:
    Magic number: dff2
    Version: 4 (CTF_VERSION_3)
    Flags: 0x1 (CTF_F_COMPRESS)
    Variable section:   0x0 -- 0xedf (0xee0 bytes)
    Type section:       0xee0 -- 0x133db3 (0x132ed4 bytes)
    String section:     0x133db4 -- 0x14cbfc (0x18e49 bytes)

  Labels:

  Data objects:

  Function objects:

  Variables:
    _xexit_cleanup ->  a7e: void (*)() (size 0x8) -> a7d: void () (size 0x0)
    bfd_x86_64_arch ->  53ee: const struct bfd_arch_info (size 0x50) -> 238: struct bfd_arch_info (size 0x50)
    iamcu_elf32_vec ->  afe9: const struct bfd_target (size 0x370) -> 286: struct bfd_target (size 0x370)
    bfd_last_cache ->  c9b6: struct bfd * (size 0x8) -> 1f4: struct bfd (size 0x6)
    _CTF_NULLSTR ->  39bf: const char [0] (size 0x0)
[...]

  Types:
     1: long int (size 0x8)
        [0x0] (ID 0x1) (kind 1) long int  (aligned at 0x8, format 0x1, offset:bits 0x0:0x40)
     2: ptrdiff_t (size 0x8) -> 1: long int (size 0x8)
        [0x0] (ID 0x2) (kind 10) ptrdiff_t  (aligned at 0x8)
     3: long unsigned int (size 0x8)
        [0x0] (ID 0x3) (kind 1) long unsigned int  (aligned at 0x8, format 0x0, offset:bits 0x0:0x40)
     4: size_t (size 0x8) -> 3: long unsigned int (size 0x8)
        [0x0] (ID 0x4) (kind 10) size_t  (aligned at 0x8)
     5: int (size 0x4)
        [0x0] (ID 0x5) (kind 1) int  (aligned at 0x4, format 0x1, offset:bits 0x0:0x20)
     6: wchar_t (size 0x4) -> 5: int (size 0x4)
        [0x0] (ID 0x6) (kind 10) wchar_t  (aligned at 0x4)
     7: struct  (size 0x20)
        [0x0] (ID 0x7) (kind 6) struct   (aligned at 0x8)
            [0x0] (ID 0x8) (kind 1) long long int __max_align_ll (aligned at 0x8, format 0x1, offset:bits 0x0:0x40)
            [0x80] (ID 0x9) (kind 2) long double __max_align_ld (aligned at 0x10, format 0x6, offset:bits 0x0:0x80)
     8: long long int (size 0x8)
        [0x0] (ID 0x8) (kind 1) long long int  (aligned at 0x8, format 0x1, offset:bits 0x0:0x40)
     9: long double (size 0x10)
        [0x0] (ID 0x9) (kind 2) long double  (aligned at 0x10, format 0x6, offset:bits 0x0:0x80)
     a: struct  (size 0x20)
        [0x0] (ID 0xa) (kind 6) struct   (aligned at 0x8)
            [0x0] (ID 0x8) (kind 1) long long int __max_align_ll (aligned at 0x8, format 0x1, offset:bits 0x0:0x40)
            [0x80] (ID 0x9) (kind 2) long double __max_align_ld (aligned at 0x10, format 0x6, offset:bits 0x0:0x80)
[...]
     668: struct elf_internal_rela * (size 0x8) -> 632: struct elf_internal_rela (size 0x18)
        [0x0] (ID 0x668) (kind 3) struct elf_internal_rela *  (aligned at 0x8)

  Strings:
    0:
    1: A
    3: AOUTHDR
    b: AOUTHDR64
    15: AddressOfEntryPoint
    29: Age
    2d: B

(Yes, that's right: 6.4MiB of debuginfo, a meg of rodata, a meg of text, and
only 200KiB of CTF: 129 bytes per type after compression even if you include the
strtab, even counting all the structure member names! And this is with a *bad*
deduplicator that emits piles of dups for no especially good reason, and a type
table layout I already know how to shrink quite a lot more, and probably we
shouldn't be emitting variable entries for static variables either: those will
probably(?) go away when I do the function and variable info section linking
work. And I'm hoping to add lzma as an option as well.)

Hans-Peter Nilsson (1):
  libctf: make it compile for old glibc

Nick Alcock (18):
  libctf, include: ChangeLog format fixes
  libctf: allow the header to change between versions
  libctf, binutils: dump the CTF header
  libctf, bfd: fix ctf_bfdopen_ctfsect opening symbol and string
    sections
  libctf: add the object index and function index sections
  binutils: readelf: when dumping CTF, load strtab and symtab
    automatically
  binutils: objdump does not take --ctf-symbols or --ctf-strings options
  libctf: Add iteration over non-root types
  libctf: support getting strings from the ELF strtab
  libctf: write CTF files to memory, and CTF archives to fds
  libctf: fix memory leak on ctf_compress_write error path
  libctf: dump: support non-root type dumping
  libctf: dump: check the right error values when dumping functions
  libctf: add the ctf_link machinery
  libctf: map from old to corresponding newly-added types in
    ctf_add_type
  libctf: add linking of the variable section
  libctf: get rid of a disruptive public include of <sys/param.h>
  bfd: new functions for getting strings out of a strtab
  bfd, ld: add CTF section linking

--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 01/19] libctf: make it compile for old glibc

Nick Alcock
From: Hans-Peter Nilsson <[hidden email]>

With a glibc before 2.9 (such as 2.8), there's <endian.h> but no
htole64 or le64toh, so you get, compiling binutils for any target:

libtool: link: gcc -W -Wall -Wstrict-prototypes -Wmissing-prototypes \
-Wshadow -Werror -I/x/binutils/../zlib -g -O2 -o objdump \
objdump.o dwarf.o prdbg.o rddbg.o debug.o stabs.o rdcoff.o \
bucomm.o version.o filemode.o elfcomm.o  ../opcodes/.libs/libopcodes.a \
../libctf/libctf.a ../bfd/.libs/libbfd.a -L/x/obj/b/zlib -lz ../libiberty/libiberty.a -ldl
../libctf/libctf.a(ctf-archive.o): In function `ctf_archive_raw_iter_internal':
/x/src/libctf/ctf-archive.c:543: undefined reference to `le64toh'
/x/src/libctf/ctf-archive.c:550: undefined reference to `le64toh'
/x/src/libctf/ctf-archive.c:551: undefined reference to `le64toh'
/x/src/libctf/ctf-archive.c:551: undefined reference to `le64toh'
/x/src/libctf/ctf-archive.c:554: undefined reference to `le64toh'
../libctf/libctf.a(ctf-archive.o):/x/src/libctf/ctf-archive.c:545: more undefined references to `le64toh' follow
(etc)

Also, I see no bswap_identity_64 *anywhere* except in libctf/swap.h
(including current glibc) and I don't think calling an "identity"-
function is better than just plain "#define foo(x) (x)" anyway.
(Where does the idea of a bytestap.h bswap_identity_64 come from?)

Speaking of that, I should mention that I instrumented the condition
to observe that the WORDS_BIGENDIAN case passes too for a presumed
big-endian target and glibc-2.8: there is a bswap_64 present for that
version.  Curiously, no test-case regressed with that instrumentation.

For the record, constructing binary blobs using text source to run
tests on, can be done by linking to --oformat binary (with most ELF
targets), but I guess that's seen as unnecessary roundabout perhaps
checking in binary files in the test-suite would be ok these days.
[...]

[nca: trimmed commit log slightly, updated changelog]

libctf/
        * ctf-endian.h: Don't assume htole64 and le64toh are always
        present if HAVE_ENDIAN_H; also check if htole64 is defined.
        [! WORDS_BIGENDIAN] (htole64, le64toh): Define as identity,
        not bswap_identity_64.
---
 libctf/ctf-endian.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libctf/ctf-endian.h b/libctf/ctf-endian.h
index ec177d1bdd..f1cc527a08 100644
--- a/libctf/ctf-endian.h
+++ b/libctf/ctf-endian.h
@@ -24,10 +24,10 @@
 #include <stdint.h>
 #include "swap.h"
 
-#ifndef HAVE_ENDIAN_H
+#if !defined (HAVE_ENDIAN_H) || !defined (htole64)
 #ifndef WORDS_BIGENDIAN
-# define htole64(x) bswap_identity_64 ((x))
-# define le64toh(x) bswap_identity_64 ((x))
+# define htole64(x) (x)
+# define le64toh(x) (x)
 #else
 # define htole64(x) bswap_64 ((x))
 # define le64toh(x) bswap_64 ((x))
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 02/19] libctf: allow the header to change between versions

Nick Alcock
In reply to this post by Nick Alcock
libctf supports dynamic upgrading of the type table as file format
versions change, but before now has not supported changes to the CTF
header.  Doing this is complicated by the baroque storage method used:
the CTF header is kept prepended to the rest of the CTF data, just as
when read from the file, and written out from there, and is
endian-flipped in place.

This makes accessing it needlessly hard and makes it almost impossible
to make the header larger if we add fields.  The general storage
machinery around the malloced ctf pointer (the 'ctf_base') is also
overcomplicated: the pointer is sometimes malloced locally and sometimes
assigned from a parameter, so freeing it requires checking to see if
that parameter was used, needlessly coupling ctf_bufopen and
ctf_file_close together.

So split the header out into a new ctf_file_t.ctf_header, which is
written out explicitly: squeeze it out of the CTF buffer whenever we
reallocate it, and use ctf_file_t.ctf_buf to skip past the header when
we do not need to reallocate (when no upgrading or endian-flipping is
required).  We now track whether the CTF base can be freed explicitly
via a new ctf_dynbase pointer which is non-NULL only when freeing is
possible.

With all this done, we can upgrade the header on the fly and add new
fields as desired, via a new upgrade_header function in ctf-open.
As with other forms of upgrading, libctf upgrades older headers
automatically to the latest supported version at open time.

For a first use of this field, we add a new string field cth_cuname, and
a corresponding setter/getter pair ctf_cuname_set and ctf_cuname: this
is used by debuggers to determine whether a CTF section's types relate
to a single compilation unit, or to all compilation units in the
program.  (Types with ambiguous definitions in different CUs have only
one of these types placed in the top-level shared .ctf container: the
rest are placed in much smaller per-CU containers, which have the shared
container as their parent.  Since CTF must be useful in the absence of
DWARF, we store the names of the relevant CUs ourselves, so the debugger
can look them up.)

include/
        * ctf-api.h (ctf_cuname): New function.
        (ctf_cuname_set): Likewise.
        * ctf.h: Improve comment around upgrading, no longer
        implying that v2 is the target of upgrades (it is v3 now).
        (ctf_header_v2_t): New, old-format header for backward
        compatibility.
        (ctf_header_t): Add cth_cuname: this is the first of several
        header changes in format v3.
libctf/
        * ctf-impl.h (ctf_file_t): New fields ctf_header, ctf_dynbase,
        ctf_cuname, ctf_dyncuname: ctf_base and ctf_buf are no longer const.
        * ctf-open.c (ctf_set_base): Preserve the gap between ctf_buf and
        ctf_base: do not assume that it is always sizeof (ctf_header_t).
        Print out ctf_cuname: only print out ctf_parname if set.
        (ctf_free_base): Removed, ctf_base is no longer freed: free
        ctf_dynbase instead.
        (ctf_set_version): Fix spacing.
        (upgrade_header): New, in-place header upgrading.
        (upgrade_types): Rename to...
        (upgrade_types_v1): ... this.  Free ctf_dynbase, not ctf_base.  No
        longer track old and new headers separately.  No longer allow for
        header sizes explicitly: squeeze the headers out on upgrade (they
        are preserved in fp->ctf_header).  Set ctf_dynbase, ctf_base and
        ctf_buf explicitly.  Use ctf_free, not ctf_free_base.
        (upgrade_types): New, also handle ctf_parmax updating.
        (flip_header): Flip ctf_cuname.
        (flip_types): Flip BUF explicitly rather than deriving BUF from
        BASE.
        (ctf_bufopen): Store the header in fp->ctf_header.  Correct minimum
        required alignment of objtoff and funcoff.  No longer store it in
        the ctf_buf unless that buf is derived unmodified from the input.
        Set ctf_dynbase where ctf_base is dynamically allocated. Drop locals
        that duplicate fields in ctf_file: move allocation of ctf_file
        further up instead.  Call upgrade_header as needed.  Move
        version-specific ctf_parmax initialization into upgrade_types.  More
        concise error handling.
        (ctf_file_close): No longer test for null pointers before freeing.
        Free ctf_dyncuname, ctf_dynbase, and ctf_header.  Do not call
        ctf_free_base.
        (ctf_cuname): New.
        (ctf_cuname_set): New.
        * ctf-create.c (ctf_update): Populate ctf_cuname.
        (ctf_gzwrite): Write out the header explicitly.  Remove obsolescent
        comment.
        (ctf_write): Likewise.
        (ctf_compress_write): Get the header from ctf_header, not ctf_base.
        Fix the compression length: fp->ctf_size never counted the CTF
        header.  Simplify the compress call accordingly.
---
 include/ctf-api.h   |   2 +
 include/ctf.h       |  30 +++-
 libctf/ctf-create.c |  49 ++++--
 libctf/ctf-impl.h   |   8 +-
 libctf/ctf-open.c   | 367 +++++++++++++++++++++++++-------------------
 5 files changed, 278 insertions(+), 178 deletions(-)

diff --git a/include/ctf-api.h b/include/ctf-api.h
index 3acbc91b9a..42c3c9a319 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -258,6 +258,8 @@ extern void ctf_file_close (ctf_file_t *);
 extern int ctf_arc_write (const char *, ctf_file_t **, size_t,
   const char **, size_t);
 
+extern const char *ctf_cuname (ctf_file_t *);
+extern void ctf_cuname_set (ctf_file_t *, const char *);
 extern ctf_file_t *ctf_parent_file (ctf_file_t *);
 extern const char *ctf_parent_name (ctf_file_t *);
 extern void ctf_parent_name_set (ctf_file_t *, const char *);
diff --git a/include/ctf.h b/include/ctf.h
index 2b357816ba..7e00005d27 100644
--- a/include/ctf.h
+++ b/include/ctf.h
@@ -126,11 +126,26 @@ typedef struct ctf_preamble
   unsigned char ctp_flags; /* Flags (see below).  */
 } ctf_preamble_t;
 
+typedef struct ctf_header_v2
+{
+  ctf_preamble_t cth_preamble;
+  uint32_t cth_parlabel; /* Ref to name of parent lbl uniq'd against.  */
+  uint32_t cth_parname; /* Ref to basename of parent.  */
+  uint32_t cth_lbloff; /* Offset of label section.  */
+  uint32_t cth_objtoff; /* Offset of object section.  */
+  uint32_t cth_funcoff; /* Offset of function section.  */
+  uint32_t cth_varoff; /* Offset of variable section.  */
+  uint32_t cth_typeoff; /* Offset of type section.  */
+  uint32_t cth_stroff; /* Offset of string section.  */
+  uint32_t cth_strlen; /* Length of string section in bytes.  */
+} ctf_header_v2_t;
+
 typedef struct ctf_header
 {
   ctf_preamble_t cth_preamble;
   uint32_t cth_parlabel; /* Ref to name of parent lbl uniq'd against.  */
   uint32_t cth_parname; /* Ref to basename of parent.  */
+  uint32_t cth_cuname; /* Ref to CU name (may be 0).  */
   uint32_t cth_lbloff; /* Offset of label section.  */
   uint32_t cth_objtoff; /* Offset of object section.  */
   uint32_t cth_funcoff; /* Offset of function section.  */
@@ -148,13 +163,14 @@ typedef struct ctf_header
 
 /* Data format version number.  */
 
-/* v1 upgraded to v2 is not quite the same as native v2 (the boundary between
-   parent and child types is different), and you can write it out again via
-   ctf_compress_write(), so we must track whether the thing was originally v1 or
-   not.  If we were writing the header from scratch, we would add a *pair* of
-   version number fields to allow for this, but this will do for now.  (A flag
-   will not do, because we need to encode both the version we came from and the
-   version we went to, not just "we were upgraded".) */
+/* v1 upgraded to a later version is not quite the same as the native form,
+   because the boundary between parent and child types is different but not
+   recorded anywhere, and you can write it out again via ctf_compress_write(),
+   so we must track whether the thing was originally v1 or not.  If we were
+   writing the header from scratch, we would add a *pair* of version number
+   fields to allow for this, but this will do for now.  (A flag will not do,
+   because we need to encode both the version we came from and the version we
+   went to, not just "we were upgraded".) */
 
 # define CTF_VERSION_1 1
 # define CTF_VERSION_1_UPGRADED_3 2
diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index 24ea114f29..4ea288e451 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -294,6 +294,8 @@ ctf_update (ctf_file_t *fp)
   hdrp = (ctf_header_t *) buf;
   if ((fp->ctf_flags & LCTF_CHILD) && (fp->ctf_parname != NULL))
     ctf_str_add_ref (fp, fp->ctf_parname, &hdrp->cth_parname);
+  if (fp->ctf_cuname != NULL)
+    ctf_str_add_ref (fp, fp->ctf_cuname, &hdrp->cth_cuname);
 
   /* Work over the variable list, translating everything into ctf_varent_t's and
      prepping the string table.  */
@@ -1919,15 +1921,26 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
   return dst_type;
 }
 
-/* Write the compressed CTF data stream to the specified gzFile descriptor.
-   This is useful for saving the results of dynamic CTF containers.  */
+/* Write the compressed CTF data stream to the specified gzFile descriptor.  */
 int
 ctf_gzwrite (ctf_file_t *fp, gzFile fd)
 {
-  const unsigned char *buf = fp->ctf_base;
-  ssize_t resid = fp->ctf_size;
+  const unsigned char *buf;
+  ssize_t resid;
   ssize_t len;
 
+  resid = sizeof (ctf_header_t);
+  buf = (unsigned char *) fp->ctf_header;
+  while (resid != 0)
+    {
+      if ((len = gzwrite (fd, buf, resid)) <= 0)
+ return (ctf_set_errno (fp, errno));
+      resid -= len;
+      buf += len;
+    }
+
+  resid = fp->ctf_size;
+  buf = fp->ctf_buf;
   while (resid != 0)
     {
       if ((len = gzwrite (fd, buf, resid)) <= 0)
@@ -1950,12 +1963,12 @@ ctf_compress_write (ctf_file_t *fp, int fd)
   ctf_header_t *hp = &h;
   ssize_t header_len = sizeof (ctf_header_t);
   ssize_t compress_len;
-  size_t max_compress_len = compressBound (fp->ctf_size - header_len);
+  size_t max_compress_len = compressBound (fp->ctf_size);
   ssize_t len;
   int rc;
   int err = 0;
 
-  memcpy (hp, fp->ctf_base, header_len);
+  memcpy (hp, fp->ctf_header, header_len);
   hp->cth_flags |= CTF_F_COMPRESS;
 
   if ((buf = ctf_alloc (max_compress_len)) == NULL)
@@ -1963,8 +1976,7 @@ ctf_compress_write (ctf_file_t *fp, int fd)
 
   compress_len = max_compress_len;
   if ((rc = compress (buf, (uLongf *) &compress_len,
-      fp->ctf_base + header_len,
-      fp->ctf_size - header_len)) != Z_OK)
+      fp->ctf_buf, fp->ctf_size)) != Z_OK)
     {
       ctf_dprintf ("zlib deflate err: %s\n", zError (rc));
       err = ctf_set_errno (fp, ECTF_COMPRESS);
@@ -2000,18 +2012,29 @@ ret:
   return err;
 }
 
-/* Write the uncompressed CTF data stream to the specified file descriptor.
-   This is useful for saving the results of dynamic CTF containers.  */
+/* Write the uncompressed CTF data stream to the specified file descriptor.  */
 int
 ctf_write (ctf_file_t *fp, int fd)
 {
-  const unsigned char *buf = fp->ctf_base;
-  ssize_t resid = fp->ctf_size;
+  const unsigned char *buf;
+  ssize_t resid;
   ssize_t len;
 
+  resid = sizeof (ctf_header_t);
+  buf = (unsigned char *) fp->ctf_header;
+  while (resid != 0)
+    {
+      if ((len = write (fd, buf, resid)) <= 0)
+ return (ctf_set_errno (fp, errno));
+      resid -= len;
+      buf += len;
+    }
+
+  resid = fp->ctf_size;
+  buf = fp->ctf_buf;
   while (resid != 0)
     {
-      if ((len = write (fd, buf, resid)) < 0)
+      if ((len = write (fd, buf, resid)) <= 0)
  return (ctf_set_errno (fp, errno));
       resid -= len;
       buf += len;
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index b51118cc6f..1cfab431ca 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -217,6 +217,7 @@ typedef struct ctf_str_atom_ref
 struct ctf_file
 {
   const ctf_fileops_t *ctf_fileops; /* Version-specific file operations.  */
+  struct ctf_header *ctf_header;    /* The header from this CTF file.  */
   ctf_sect_t ctf_data;    /* CTF data from object file.  */
   ctf_sect_t ctf_symtab;    /* Symbol table from object file.  */
   ctf_sect_t ctf_strtab;    /* String table from object file.  */
@@ -230,8 +231,9 @@ struct ctf_file
   ctf_strs_t ctf_str[2];    /* Array of string table base and bounds.  */
   ctf_dynhash_t *ctf_str_atoms;  /* Hash table of ctf_str_atoms_t.  */
   uint64_t ctf_str_num_refs;  /* Number of refs to cts_str_atoms.  */
-  const unsigned char *ctf_base;  /* Base of CTF header + uncompressed buffer.  */
-  const unsigned char *ctf_buf;  /* Uncompressed CTF data buffer.  */
+  unsigned char *ctf_base;  /* CTF file pointer.  */
+  unsigned char *ctf_dynbase;  /* Freeable CTF file pointer. */
+  unsigned char *ctf_buf;  /* Uncompressed CTF data buffer.  */
   size_t ctf_size;  /* Size of CTF header + uncompressed data.  */
   uint32_t *ctf_sxlate;  /* Translation table for symtab entries.  */
   unsigned long ctf_nsyms;  /* Number of entries in symtab xlate table.  */
@@ -241,6 +243,8 @@ struct ctf_file
   unsigned long ctf_nvars;  /* Number of variables in ctf_vars.  */
   unsigned long ctf_typemax;  /* Maximum valid type ID number.  */
   const ctf_dmodel_t *ctf_dmodel; /* Data model pointer (see above).  */
+  const char *ctf_cuname;  /* Compilation unit name (if any).  */
+  char *ctf_dyncuname;  /* Dynamically allocated name of CU.  */
   struct ctf_file *ctf_parent;  /* Parent CTF container (if any).  */
   const char *ctf_parlabel;  /* Label in parent container (if any).  */
   const char *ctf_parname;  /* Basename of parent (if any).  */
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 8fc854ae67..46fb42e1e3 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -309,14 +309,18 @@ init_symtab (ctf_file_t *fp, const ctf_header_t *hp,
   return 0;
 }
 
-/* Set the CTF base pointer and derive the buf pointer from it, initializing
-   everything in the ctf_file that depends on the base or buf pointers.  */
+/* Reset the CTF base pointer and derive the buf pointer from it, initializing
+   everything in the ctf_file that depends on the base or buf pointers.
+
+   The original gap between the buf and base pointers, if any -- the original,
+   unconverted CTF header -- is kept, but its contents are not specified and are
+   never used.  */
 
 static void
-ctf_set_base (ctf_file_t *fp, const ctf_header_t *hp, void *base)
+ctf_set_base (ctf_file_t *fp, const ctf_header_t *hp, unsigned char *base)
 {
+  fp->ctf_buf = base + (fp->ctf_buf - fp->ctf_base);
   fp->ctf_base = base;
-  fp->ctf_buf = fp->ctf_base + sizeof (ctf_header_t);
   fp->ctf_vars = (ctf_varent_t *) ((const char *) fp->ctf_buf +
    hp->cth_varoff);
   fp->ctf_nvars = (hp->cth_typeoff - hp->cth_varoff) / sizeof (ctf_varent_t);
@@ -336,27 +340,17 @@ ctf_set_base (ctf_file_t *fp, const ctf_header_t *hp, void *base)
     fp->ctf_parlabel = ctf_strptr (fp, hp->cth_parlabel);
   if (hp->cth_parname != 0)
     fp->ctf_parname = ctf_strptr (fp, hp->cth_parname);
-
-  ctf_dprintf ("ctf_set_base: parent name %s (label %s)\n",
-       fp->ctf_parname ? fp->ctf_parname : "<NULL>",
+  if (hp->cth_cuname != 0)
+    fp->ctf_cuname = ctf_strptr (fp, hp->cth_cuname);
+
+  if (fp->ctf_cuname)
+    ctf_dprintf ("ctf_set_base: CU name %s\n", fp->ctf_cuname);
+  if (fp->ctf_parname)
+    ctf_dprintf ("ctf_set_base: parent name %s (label %s)\n",
+       fp->ctf_parname,
        fp->ctf_parlabel ? fp->ctf_parlabel : "<NULL>");
 }
 
-/* Free a ctf_base pointer: the pointer passed, or (if NULL) fp->ctf_base.  */
-static void
-ctf_free_base (ctf_file_t *fp, unsigned char *ctf_base)
-{
-  unsigned char *base;
-
-  if (ctf_base)
-      base = ctf_base;
-  else
-      base = (unsigned char *) fp->ctf_base;
-
-  if (base != fp->ctf_data.cts_data && base != NULL)
-    ctf_free (base);
-}
-
 /* Set the version of the CTF file. */
 
 /* When this is reset, LCTF_* changes behaviour, but there is no guarantee that
@@ -364,14 +358,32 @@ ctf_free_base (ctf_file_t *fp, unsigned char *ctf_base)
    caller must ensure this has been done in advance.  */
 
 static void
-ctf_set_version (ctf_file_t * fp, ctf_header_t * cth, int ctf_version)
+ctf_set_version (ctf_file_t *fp, ctf_header_t *cth, int ctf_version)
 {
   fp->ctf_version = ctf_version;
   cth->cth_version = ctf_version;
   fp->ctf_fileops = &ctf_fileops[ctf_version];
 }
 
-/* Upgrade the type table to CTF_VERSION_3 (really CTF_VERSION_1_UPGRADED_3).
+
+/* Upgrade the header to CTF_VERSION_3.  The upgrade is done in-place.  */
+static void
+upgrade_header (ctf_header_t *hp)
+{
+  ctf_header_v2_t *oldhp = (ctf_header_v2_t *) hp;
+
+  hp->cth_strlen = oldhp->cth_strlen;
+  hp->cth_stroff = oldhp->cth_stroff;
+  hp->cth_typeoff = oldhp->cth_typeoff;
+  hp->cth_varoff = oldhp->cth_varoff;
+  hp->cth_funcoff = oldhp->cth_funcoff;
+  hp->cth_objtoff = oldhp->cth_objtoff;
+  hp->cth_lbloff = oldhp->cth_lbloff;
+  hp->cth_cuname = 0; /* No CU name.  */
+}
+
+/* Upgrade the type table to CTF_VERSION_3 (really CTF_VERSION_1_UPGRADED_3)
+   from CTF_VERSION_1.
 
    The upgrade is not done in-place: the ctf_base is moved.  ctf_strptr() must
    not be called before reallocation is complete.
@@ -379,17 +391,16 @@ ctf_set_version (ctf_file_t * fp, ctf_header_t * cth, int ctf_version)
    Type kinds not checked here due to nonexistence in older formats:
       CTF_K_SLICE.  */
 static int
-upgrade_types (ctf_file_t *fp, ctf_header_t *cth)
+upgrade_types_v1 (ctf_file_t *fp, ctf_header_t *cth)
 {
   const ctf_type_v1_t *tbuf;
   const ctf_type_v1_t *tend;
-  unsigned char *ctf_base, *old_ctf_base = (unsigned char *) fp->ctf_base;
+  unsigned char *ctf_base, *old_ctf_base = (unsigned char *) fp->ctf_dynbase;
   ctf_type_t *t2buf;
 
   ssize_t increase = 0, size, increment, v2increment, vbytes, v2bytes;
   const ctf_type_v1_t *tp;
   ctf_type_t *t2p;
-  ctf_header_t *new_cth;
 
   tbuf = (ctf_type_v1_t *) (fp->ctf_buf + cth->cth_typeoff);
   tend = (ctf_type_v1_t *) (fp->ctf_buf + cth->cth_stroff);
@@ -425,35 +436,33 @@ upgrade_types (ctf_file_t *fp, ctf_header_t *cth)
       increase += v2bytes - vbytes;
     }
 
-  /* Allocate enough room for the new buffer, then copy everything but the
-     type section into place, and reset the base accordingly.  Leave the
-     version number unchanged, so that LCTF_INFO_* still works on the
+  /* Allocate enough room for the new buffer, then copy everything but the type
+     section into place, and reset the base accordingly.  Leave the version
+     number unchanged, so that LCTF_INFO_* still works on the
      as-yet-untranslated type info.  */
 
   if ((ctf_base = ctf_alloc (fp->ctf_size + increase)) == NULL)
     return ECTF_ZALLOC;
 
-  memcpy (ctf_base, fp->ctf_base, sizeof (ctf_header_t) + cth->cth_typeoff);
-  memcpy (ctf_base + sizeof (ctf_header_t) + cth->cth_stroff + increase,
-  fp->ctf_base + sizeof (ctf_header_t) + cth->cth_stroff,
-  cth->cth_strlen);
+  /* Start at ctf_buf, not ctf_base, to squeeze out the original header: we
+     never use it and it is unconverted.  */
 
-  memset (ctf_base + sizeof (ctf_header_t) + cth->cth_typeoff, 0,
- cth->cth_stroff - cth->cth_typeoff + increase);
+  memcpy (ctf_base, fp->ctf_buf, cth->cth_typeoff);
+  memcpy (ctf_base + cth->cth_stroff + increase,
+  fp->ctf_buf + cth->cth_stroff, cth->cth_strlen);
 
-  /* The cth here is an automatic variable in ctf_bufopen(), and transient
-     (a copy maintained because at that stage the header read out of the
-     ctf file may be read-only). We make all modifications in the
-     canonical copy at ctf_base (by now, writable), then copy it back into
-     cth at the end.  */
+  memset (ctf_base + cth->cth_typeoff, 0, cth->cth_stroff - cth->cth_typeoff
+  + increase);
 
-  new_cth = (ctf_header_t *) ctf_base;
-  new_cth->cth_stroff += increase;
+  cth->cth_stroff += increase;
   fp->ctf_size += increase;
-  assert (new_cth->cth_stroff >= new_cth->cth_typeoff);
-  ctf_set_base (fp, new_cth, ctf_base);
+  assert (cth->cth_stroff >= cth->cth_typeoff);
+  fp->ctf_base = ctf_base;
+  fp->ctf_buf = ctf_base;
+  fp->ctf_dynbase = ctf_base;
+  ctf_set_base (fp, cth, ctf_base);
 
-  t2buf = (ctf_type_t *) (fp->ctf_buf + new_cth->cth_typeoff);
+  t2buf = (ctf_type_t *) (fp->ctf_buf + cth->cth_typeoff);
 
   /* Iterate through all the types again, upgrading them.
 
@@ -596,15 +605,38 @@ upgrade_types (ctf_file_t *fp, ctf_header_t *cth)
      converting too much, or too little (leading to a buffer overrun either here
      or at read time, in init_types().) */
 
-  assert ((size_t) t2p - (size_t) fp->ctf_buf == new_cth->cth_stroff);
+  assert ((size_t) t2p - (size_t) fp->ctf_buf == cth->cth_stroff);
 
-  ctf_set_version (fp, (ctf_header_t *) ctf_base, CTF_VERSION_1_UPGRADED_3);
-  ctf_free_base (fp, old_ctf_base);
-  memcpy (cth, new_cth, sizeof (ctf_header_t));
+  ctf_set_version (fp, cth, CTF_VERSION_1_UPGRADED_3);
+  ctf_free (old_ctf_base);
 
   return 0;
 }
 
+/* Upgrade from any earlier version.  */
+static int
+upgrade_types (ctf_file_t *fp, ctf_header_t *cth)
+{
+  switch (cth->cth_version)
+    {
+      /* v1 requires a full pass and reformatting.  */
+    case CTF_VERSION_1:
+      upgrade_types_v1 (fp, cth);
+      /* FALLTHRU */
+      /* Already-converted v1 is just like later versions except that its
+ parent/child boundary is unchanged (and much lower).  */
+
+    case CTF_VERSION_1_UPGRADED_3:
+      fp->ctf_parmax = CTF_MAX_PTYPE_V1;
+
+      /* v2 is just the same as v3 except for new types and sections:
+ no upgrading required. */
+    case CTF_VERSION_2: ;
+      /* FALLTHRU */
+    }
+  return 0;
+}
+
 /* Initialize the type ID translation table with the byte offset of each type,
    and initialize the hash tables of each named type.  Upgrade the type table to
    the latest supported representation in the process, if needed, and if this
@@ -932,6 +964,7 @@ flip_header (ctf_header_t *cth)
   swap_thing (cth->cth_preamble.ctp_flags);
   swap_thing (cth->cth_parlabel);
   swap_thing (cth->cth_parname);
+  swap_thing (cth->cth_cuname);
   swap_thing (cth->cth_objtoff);
   swap_thing (cth->cth_funcoff);
   swap_thing (cth->cth_varoff);
@@ -1131,7 +1164,7 @@ flip_types (void *start, size_t len)
   return 0;
 }
 
-/* Flip the endianness of BASE, given the offsets in the (already endian-
+/* Flip the endianness of BUF, given the offsets in the (already endian-
    converted) CTH.
 
    All of this stuff happens before the header is fully initialized, so the
@@ -1139,15 +1172,13 @@ flip_types (void *start, size_t len)
    data, this is no real loss.  */
 
 static int
-flip_ctf (ctf_header_t *cth, unsigned char *base)
+flip_ctf (ctf_header_t *cth, unsigned char *buf)
 {
-  base += sizeof (ctf_header_t);
-
-  flip_lbls (base + cth->cth_lbloff, cth->cth_objtoff - cth->cth_lbloff);
-  flip_objts (base + cth->cth_objtoff, cth->cth_funcoff - cth->cth_objtoff);
-  flip_objts (base + cth->cth_funcoff, cth->cth_varoff - cth->cth_funcoff);
-  flip_vars (base + cth->cth_varoff, cth->cth_typeoff - cth->cth_varoff);
-  return flip_types (base + cth->cth_typeoff, cth->cth_stroff - cth->cth_typeoff);
+  flip_lbls (buf + cth->cth_lbloff, cth->cth_objtoff - cth->cth_lbloff);
+  flip_objts (buf + cth->cth_objtoff, cth->cth_funcoff - cth->cth_objtoff);
+  flip_objts (buf + cth->cth_funcoff, cth->cth_varoff - cth->cth_funcoff);
+  flip_vars (buf + cth->cth_varoff, cth->cth_typeoff - cth->cth_varoff);
+  return flip_types (buf + cth->cth_typeoff, cth->cth_stroff - cth->cth_typeoff);
 }
 
 /* Open a CTF file, mocking up a suitable ctf_sect.  */
@@ -1205,10 +1236,9 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
      const ctf_sect_t *strsect, int *errp)
 {
   const ctf_preamble_t *pp;
-  ctf_header_t hp;
+  size_t hdrsz = sizeof (ctf_header_t);
+  ctf_header_t *hp;
   ctf_file_t *fp;
-  void *base;
-  size_t size, hdrsz;
   int foreign_endian = 0;
   int err;
 
@@ -1270,36 +1300,55 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
       return (ctf_set_open_errno (errp, ECTF_NOTSUP));
     }
 
-  if (ctfsect->cts_size < sizeof (ctf_header_t))
+  if (pp->ctp_version < CTF_VERSION_3)
+    hdrsz = sizeof (ctf_header_v2_t);
+
+  if (ctfsect->cts_size < hdrsz)
     return (ctf_set_open_errno (errp, ECTF_NOCTFBUF));
 
-  memcpy (&hp, ctfsect->cts_data, sizeof (hp));
+  if ((fp = ctf_alloc (sizeof (ctf_file_t))) == NULL)
+    return (ctf_set_open_errno (errp, ENOMEM));
+
+  memset (fp, 0, sizeof (ctf_file_t));
+
+  if ((fp->ctf_header = ctf_alloc (sizeof (struct ctf_header))) == NULL)
+    {
+      ctf_free (fp);
+      return (ctf_set_open_errno (errp, ENOMEM));
+    }
+  hp = fp->ctf_header;
+  memcpy (hp, ctfsect->cts_data, hdrsz);
+  if (pp->ctp_version < CTF_VERSION_3)
+    upgrade_header (hp);
 
   if (foreign_endian)
-    flip_header (&hp);
+    flip_header (hp);
 
   ctf_dprintf ("header offsets: %x/%x/%x/%x/%x/%x/%x\n",
-       hp.cth_lbloff, hp.cth_objtoff, hp.cth_funcoff, hp.cth_varoff,
-       hp.cth_typeoff, hp.cth_stroff, hp.cth_strlen);
-  hdrsz = sizeof (ctf_header_t);
+       hp->cth_lbloff, hp->cth_objtoff, hp->cth_funcoff,
+       hp->cth_varoff, hp->cth_typeoff, hp->cth_stroff,
+               hp->cth_strlen);
 
-  size = hp.cth_stroff + hp.cth_strlen;
+  fp->ctf_size = hp->cth_stroff + hp->cth_strlen;
 
-  ctf_dprintf ("ctf_bufopen: uncompressed size=%lu\n", (unsigned long) size);
+  ctf_dprintf ("ctf_bufopen: uncompressed size=%lu\n",
+       (unsigned long) fp->ctf_size);
 
-  if (hp.cth_lbloff > size || hp.cth_objtoff > size
-      || hp.cth_funcoff > size || hp.cth_typeoff > size || hp.cth_stroff > size)
+  if (hp->cth_lbloff > fp->ctf_size || hp->cth_objtoff > fp->ctf_size
+      || hp->cth_funcoff > fp->ctf_size || hp->cth_typeoff > fp->ctf_size
+      || hp->cth_stroff > fp->ctf_size)
     return (ctf_set_open_errno (errp, ECTF_CORRUPT));
 
-  if (hp.cth_lbloff > hp.cth_objtoff
-      || hp.cth_objtoff > hp.cth_funcoff
-      || hp.cth_funcoff > hp.cth_typeoff
-      || hp.cth_funcoff > hp.cth_varoff
-      || hp.cth_varoff > hp.cth_typeoff || hp.cth_typeoff > hp.cth_stroff)
+  if (hp->cth_lbloff > hp->cth_objtoff
+      || hp->cth_objtoff > hp->cth_funcoff
+      || hp->cth_funcoff > hp->cth_typeoff
+      || hp->cth_funcoff > hp->cth_varoff
+      || hp->cth_varoff > hp->cth_typeoff || hp->cth_typeoff > hp->cth_stroff)
     return (ctf_set_open_errno (errp, ECTF_CORRUPT));
 
-  if ((hp.cth_lbloff & 3) || (hp.cth_objtoff & 1)
-      || (hp.cth_funcoff & 1) || (hp.cth_varoff & 3) || (hp.cth_typeoff & 3))
+  if ((hp->cth_lbloff & 3) || (hp->cth_objtoff & 2)
+      || (hp->cth_funcoff & 2) || (hp->cth_varoff & 3)
+      || (hp->cth_typeoff & 3))
     return (ctf_set_open_errno (errp, ECTF_CORRUPT));
 
   /* Once everything is determined to be valid, attempt to decompress the CTF
@@ -1310,76 +1359,78 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
   /* Note: if this is a v1 buffer, it will be reallocated and expanded by
      init_types().  */
 
-  if (hp.cth_flags & CTF_F_COMPRESS)
+  if (hp->cth_flags & CTF_F_COMPRESS)
     {
       size_t srclen;
       uLongf dstlen;
       const void *src;
       int rc = Z_OK;
-      void *buf;
 
-      if ((base = ctf_alloc (size + hdrsz)) == NULL)
- return (ctf_set_open_errno (errp, ECTF_ZALLOC));
+      /* We are allocating this ourselves, so we can drop the ctf header
+ copy in favour of ctf->ctf_header.  */
 
-      memcpy (base, ctfsect->cts_data, hdrsz);
-      ((ctf_preamble_t *) base)->ctp_flags &= ~CTF_F_COMPRESS;
-      buf = (unsigned char *) base + hdrsz;
+      if ((fp->ctf_base = ctf_alloc (fp->ctf_size)) == NULL)
+ {
+  err = ECTF_ZALLOC;
+  goto bad;
+ }
+      fp->ctf_dynbase = fp->ctf_base;
+      hp->cth_flags &= ~CTF_F_COMPRESS;
 
       src = (unsigned char *) ctfsect->cts_data + hdrsz;
       srclen = ctfsect->cts_size - hdrsz;
-      dstlen = size;
+      dstlen = fp->ctf_size;
+      fp->ctf_buf = fp->ctf_base;
 
-      if ((rc = uncompress (buf, &dstlen, src, srclen)) != Z_OK)
+      if ((rc = uncompress (fp->ctf_base, &dstlen, src, srclen)) != Z_OK)
  {
   ctf_dprintf ("zlib inflate err: %s\n", zError (rc));
-  free (base);
-  return (ctf_set_open_errno (errp, ECTF_DECOMPRESS));
+  err = ECTF_DECOMPRESS;
+  goto bad;
  }
 
-      if ((size_t) dstlen != size)
+      if ((size_t) dstlen != fp->ctf_size)
  {
   ctf_dprintf ("zlib inflate short -- got %lu of %lu "
-       "bytes\n", (unsigned long) dstlen, (unsigned long) size);
-  free (base);
-  return (ctf_set_open_errno (errp, ECTF_CORRUPT));
+       "bytes\n", (unsigned long) dstlen,
+       (unsigned long) fp->ctf_size);
+  err = ECTF_CORRUPT;
+  goto bad;
  }
-
     }
   else if (foreign_endian)
     {
-      if ((base = ctf_alloc (size + hdrsz)) == NULL)
- return (ctf_set_open_errno (errp, ECTF_ZALLOC));
-      memcpy (base, ctfsect->cts_data, size + hdrsz);
+      if ((fp->ctf_base = ctf_alloc (fp->ctf_size)) == NULL)
+ {
+  err = ECTF_ZALLOC;
+  goto bad;
+ }
+      fp->ctf_dynbase = fp->ctf_base;
+      memcpy (fp->ctf_base, ((unsigned char *) ctfsect->cts_data) + hdrsz,
+      fp->ctf_size);
+      fp->ctf_buf = fp->ctf_base;
     }
   else
-    base = (void *) ctfsect->cts_data;
-
-  /* Flip the endianness of the copy of the header in the section, to avoid
-     ending up with a partially-endian-flipped file.  */
-
-  if (foreign_endian)
-    flip_header ((ctf_header_t *) base);
+    {
+      /* We are just using the section passed in -- but its header may be an old
+ version.  Point ctf_buf past the old header, and never touch it
+ again.  */
+      fp->ctf_base = (unsigned char *) ctfsect->cts_data;
+      fp->ctf_dynbase = NULL;
+      fp->ctf_buf = fp->ctf_base + hdrsz;
+    }
 
   /* Once we have uncompressed and validated the CTF data buffer, we can
-     proceed with allocating a ctf_file_t and initializing it.
+     proceed with initializing the ctf_file_t we allocated above.
 
      Nothing that depends on buf or base should be set directly in this function
      before the init_types() call, because it may be reallocated during
      transparent upgrade if this recension of libctf is so configured: see
-     ctf_set_base() and ctf_realloc_base().  */
+     ctf_set_base().  */
 
-  if ((fp = ctf_alloc (sizeof (ctf_file_t))) == NULL)
-    return (ctf_set_open_errno (errp, ENOMEM));
-
-  memset (fp, 0, sizeof (ctf_file_t));
-  ctf_set_version (fp, &hp, hp.cth_version);
+  ctf_set_version (fp, hp, hp->cth_version);
   ctf_str_create_atoms (fp);
-
-  if (_libctf_unlikely_ (hp.cth_version < CTF_VERSION_2))
-    fp->ctf_parmax = CTF_MAX_PTYPE_V1;
-  else
-    fp->ctf_parmax = CTF_MAX_PTYPE;
-
+  fp->ctf_parmax = CTF_MAX_PTYPE;
   memcpy (&fp->ctf_data, ctfsect, sizeof (ctf_sect_t));
 
   if (symsect != NULL)
@@ -1409,28 +1460,25 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
     }
 
   if (foreign_endian &&
-      (err = flip_ctf (&hp, base)) != 0)
+      (err = flip_ctf (hp, fp->ctf_buf)) != 0)
     {
       /* We can be certain that flip_ctf() will have endian-flipped everything
          other than the types table when we return.  In particular the header
          is fine, so set it, to allow freeing to use the usual code path.  */
 
-      (void) ctf_set_open_errno (errp, err);
-      ctf_set_base (fp, &hp, base);
+      ctf_set_base (fp, hp, fp->ctf_base);
       goto bad;
     }
 
-  ctf_set_base (fp, &hp, base);
-  fp->ctf_size = size + hdrsz;
+  ctf_set_base (fp, hp, fp->ctf_base);
 
-  if ((err = init_types (fp, &hp)) != 0)
-    {
-      (void) ctf_set_open_errno (errp, err);
-      goto bad;
-    }
+  if ((err = init_types (fp, hp)) != 0)
+    goto bad;
 
   /* If we have a symbol table section, allocate and initialize
-     the symtab translation table, pointed to by ctf_sxlate.  */
+     the symtab translation table, pointed to by ctf_sxlate.  This table may be
+     too large for the actual size of the object and function info sections: if
+     so, ctf_nsyms will be adjusted and the excess will never be used.  */
 
   if (symsect != NULL)
     {
@@ -1439,15 +1487,12 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
 
       if (fp->ctf_sxlate == NULL)
  {
-  (void) ctf_set_open_errno (errp, ENOMEM);
+  err = ENOMEM;
   goto bad;
  }
 
-      if ((err = init_symtab (fp, &hp, symsect, strsect)) != 0)
- {
-  (void) ctf_set_open_errno (errp, err);
-  goto bad;
- }
+      if ((err = init_symtab (fp, hp, symsect, strsect)) != 0)
+ goto bad;
     }
 
   /* Initialize the ctf_lookup_by_name top-level dictionary.  We keep an
@@ -1483,6 +1528,7 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
   return fp;
 
 bad:
+  ctf_set_open_errno (errp, err);
   ctf_file_close (fp);
   return NULL;
 }
@@ -1509,11 +1555,9 @@ ctf_file_close (ctf_file_t *fp)
       return;
     }
 
-  if (fp->ctf_dynparname != NULL)
-    ctf_free (fp->ctf_dynparname);
-
-  if (fp->ctf_parent != NULL)
-    ctf_file_close (fp->ctf_parent);
+  ctf_free (fp->ctf_dyncuname);
+  ctf_free (fp->ctf_dynparname);
+  ctf_file_close (fp->ctf_parent);
 
   for (dtd = ctf_list_next (&fp->ctf_dtdefs); dtd != NULL; dtd = ntd)
     {
@@ -1530,40 +1574,32 @@ ctf_file_close (ctf_file_t *fp)
     }
   ctf_dynhash_destroy (fp->ctf_dvhash);
   ctf_str_free_atoms (fp);
-
   ctf_free (fp->ctf_tmp_typeslice);
 
-  if (fp->ctf_data.cts_name != _CTF_NULLSTR &&
-      fp->ctf_data.cts_name != NULL)
+  if (fp->ctf_data.cts_name != _CTF_NULLSTR)
     ctf_free ((char *) fp->ctf_data.cts_name);
 
-  if (fp->ctf_symtab.cts_name != _CTF_NULLSTR &&
-      fp->ctf_symtab.cts_name != NULL)
+  if (fp->ctf_symtab.cts_name != _CTF_NULLSTR)
     ctf_free ((char *) fp->ctf_symtab.cts_name);
 
-  if (fp->ctf_strtab.cts_name != _CTF_NULLSTR &&
-      fp->ctf_strtab.cts_name != NULL)
+  if (fp->ctf_strtab.cts_name != _CTF_NULLSTR)
     ctf_free ((char *) fp->ctf_strtab.cts_name);
 
   else if (fp->ctf_data_mmapped)
     ctf_munmap (fp->ctf_data_mmapped, fp->ctf_data_mmapped_len);
 
-  ctf_free_base (fp, NULL);
-
-  if (fp->ctf_sxlate != NULL)
-    ctf_free (fp->ctf_sxlate);
-
-  if (fp->ctf_txlate != NULL)
-    ctf_free (fp->ctf_txlate);
+  ctf_free (fp->ctf_dynbase);
 
-  if (fp->ctf_ptrtab != NULL)
-    ctf_free (fp->ctf_ptrtab);
+  ctf_free (fp->ctf_sxlate);
+  ctf_free (fp->ctf_txlate);
+  ctf_free (fp->ctf_ptrtab);
 
   ctf_hash_destroy (fp->ctf_structs);
   ctf_hash_destroy (fp->ctf_unions);
   ctf_hash_destroy (fp->ctf_enums);
   ctf_hash_destroy (fp->ctf_names);
 
+  ctf_free (fp->ctf_header);
   ctf_free (fp);
 }
 
@@ -1621,6 +1657,25 @@ ctf_parent_name_set (ctf_file_t *fp, const char *name)
   fp->ctf_parname = fp->ctf_dynparname;
 }
 
+/* Return the name of the compilation unit this CTF file applies to.  Usually
+   non-NULL only for non-parent containers.  */
+const char *
+ctf_cuname (ctf_file_t *fp)
+{
+  return fp->ctf_cuname;
+}
+
+/* Set the compilation unit name.  */
+void
+ctf_cuname_set (ctf_file_t *fp, const char *name)
+{
+  if (fp->ctf_dyncuname != NULL)
+    ctf_free (fp->ctf_dyncuname);
+
+  fp->ctf_dyncuname = ctf_strdup (name);
+  fp->ctf_cuname = fp->ctf_dyncuname;
+}
+
 /* Import the types from the specified parent container by storing a pointer
    to it in ctf_parent and incrementing its reference count.  Only one parent
    is allowed: if a parent already exists, it is replaced by the new parent.  */
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 03/19] libctf, binutils: dump the CTF header

Nick Alcock
In reply to this post by Nick Alcock
The CTF header has before now been thrown away too soon to be dumped
using the ctf_dump() machinery used by objdump and readelf: instead, a
kludge involving debugging-priority dumps of the header offsets on every
open was used.

Replace this with proper first-class dumping machinery just like
everything else in the CTF file, and have objdump and readelf use it.
(The dumper already had an enum value in ctf_sect_names_t for this
purpose, waiting to be used.)

libctf/
        * ctf-impl.h (ctf_file_t): New field ctf_openflags.
        * ctf-open.c (ctf_bufopen): Set it.  No longer dump header offsets.
        * ctf-dump.c (dump_header): New function, dump the CTF header.
        (ctf_dump): Call it.
        (ctf_dump_header_strfield): New function.
        (ctf_dump_header_sectfield): Likewise.

binutils/
        * objdump.c (dump_ctf_archive_member): Dump the CTF header.
        * readelf.c (dump_section_as_ctf): Likewise.
---
 binutils/objdump.c |   7 +--
 binutils/readelf.c |   7 +--
 libctf/ctf-dump.c  | 129 ++++++++++++++++++++++++++++++++++++++++++++-
 libctf/ctf-impl.h  |   1 +
 libctf/ctf-open.c  |   6 +--
 5 files changed, 137 insertions(+), 13 deletions(-)

diff --git a/binutils/objdump.c b/binutils/objdump.c
index 6812ba7853..443dce806f 100644
--- a/binutils/objdump.c
+++ b/binutils/objdump.c
@@ -3255,8 +3255,9 @@ static int
 dump_ctf_archive_member (ctf_file_t *ctf, const char *name, void *arg)
 {
   ctf_file_t *parent = (ctf_file_t *) arg;
-  const char *things[] = {"Labels", "Data objects", "Function objects",
-  "Variables", "Types", "Strings", ""};
+  const char *things[] = {"Header", "Labels", "Data objects",
+                          "Function objects", "Variables", "Types", "Strings",
+                          ""};
   const char **thing;
   size_t i;
 
@@ -3267,7 +3268,7 @@ dump_ctf_archive_member (ctf_file_t *ctf, const char *name, void *arg)
     printf (_("\nCTF archive member: %s:\n"), sanitize_string (name));
 
   ctf_import (ctf, parent);
-  for (i = 1, thing = things; *thing[0]; thing++, i++)
+  for (i = 0, thing = things; *thing[0]; thing++, i++)
     {
       ctf_dump_state_t *s = NULL;
       char *item;
diff --git a/binutils/readelf.c b/binutils/readelf.c
index 5e8fe824b3..ea35aeef5c 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -13872,8 +13872,9 @@ dump_section_as_ctf (Elf_Internal_Shdr * section, Filedata * filedata)
   ctf_file_t *         ctf = NULL;
   ctf_file_t *         parent = NULL;
 
-  const char *things[] = {"Labels", "Data objects", "Function objects",
-  "Variables", "Types", "Strings", ""};
+  const char *things[] = {"Header", "Labels", "Data objects",
+                          "Function objects", "Variables", "Types", "Strings",
+                          ""};
   const char **thing;
   int err;
   bfd_boolean ret = FALSE;
@@ -13954,7 +13955,7 @@ dump_section_as_ctf (Elf_Internal_Shdr * section, Filedata * filedata)
   printf (_("\nDump of CTF section '%s':\n"),
   printable_section_name (filedata, section));
 
-  for (i = 1, thing = things; *thing[0]; thing++, i++)
+  for (i = 0, thing = things; *thing[0]; thing++, i++)
     {
       ctf_dump_state_t *s = NULL;
       char *item;
diff --git a/libctf/ctf-dump.c b/libctf/ctf-dump.c
index 0e8ab202dd..8c49a04afd 100644
--- a/libctf/ctf-dump.c
+++ b/libctf/ctf-dump.c
@@ -153,6 +153,132 @@ ctf_dump_format_type (ctf_file_t *fp, ctf_id_t id)
   return NULL;
 }
 
+/* Dump one string field from the file header into the cds_items.  */
+static int
+ctf_dump_header_strfield (ctf_file_t *fp, ctf_dump_state_t *state,
+  const char *name, uint32_t value)
+{
+  char *str;
+  if (value)
+    {
+      if (asprintf (&str, "%s: %s\n", name, ctf_strptr (fp, value)) < 0)
+ goto err;
+      ctf_dump_append (state, str);
+    }
+  return 0;
+
+ err:
+  return (ctf_set_errno (fp, -ENOMEM));
+}
+
+/* Dump one section-offset field from the file header into the cds_items.  */
+static int
+ctf_dump_header_sectfield (ctf_file_t *fp, ctf_dump_state_t *state,
+   const char *sect, uint32_t off, uint32_t nextoff)
+{
+  char *str;
+  if (nextoff - off)
+    {
+      if (asprintf (&str, "%s:\t0x%lx -- 0x%lx (0x%lx bytes)\n", sect,
+    (unsigned long) off, (unsigned long) (nextoff - 1),
+    (unsigned long) (nextoff - off)) < 0)
+ goto err;
+      ctf_dump_append (state, str);
+    }
+  return 0;
+
+ err:
+  return (ctf_set_errno (fp, -ENOMEM));
+}
+
+/* Dump the file header into the cds_items.  */
+static int
+ctf_dump_header (ctf_file_t *fp, ctf_dump_state_t *state)
+{
+  char *str;
+  const ctf_header_t *hp = fp->ctf_header;
+  const char *vertab[] =
+    {
+     NULL, "CTF_VERSION_1",
+     "CTF_VERSION_1_UPGRADED_3 (latest format, version 1 type "
+     "boundaries)",
+     "CTF_VERSION_2",
+     "CTF_VERSION_3", NULL
+    };
+  const char *verstr = NULL;
+
+  if (asprintf (&str, "Magic number: %x\n", hp->cth_magic) < 0)
+      goto err;
+  ctf_dump_append (state, str);
+
+  if (hp->cth_version <= CTF_VERSION)
+    verstr = vertab[hp->cth_version];
+
+  if (verstr == NULL)
+    verstr = "(not a valid version)";
+
+  if (asprintf (&str, "Version: %i (%s)\n", hp->cth_version,
+ verstr) < 0)
+    goto err;
+  ctf_dump_append (state, str);
+
+  /* Everything else is only printed if present.  */
+
+  /* The flags are unusual in that they represent the ctf_file_t *in memory*:
+     flags representing compression, etc, are turned off as the file is
+     decompressed.  So we store a copy of the flags before they are changed, for
+     the dumper.  */
+
+  if (fp->ctf_openflags > 0)
+    {
+      if (fp->ctf_openflags)
+ if (asprintf (&str, "Flags: 0x%x (%s)", fp->ctf_openflags,
+      fp->ctf_openflags & CTF_F_COMPRESS ? "CTF_F_COMPRESS"
+                                         : "") < 0)
+ goto err;
+      ctf_dump_append (state, str);
+    }
+
+  if (ctf_dump_header_strfield (fp, state, "Parent label",
+ hp->cth_parlabel) < 0)
+    goto err;
+
+  if (ctf_dump_header_strfield (fp, state, "Parent name", hp->cth_parname) < 0)
+    goto err;
+
+  if (ctf_dump_header_strfield (fp, state, "Compilation unit name",
+ hp->cth_cuname) < 0)
+    goto err;
+
+  if (ctf_dump_header_sectfield (fp, state, "Label section", hp->cth_lbloff,
+ hp->cth_objtoff) < 0)
+    goto err;
+
+  if (ctf_dump_header_sectfield (fp, state, "Data object section",
+ hp->cth_objtoff, hp->cth_funcoff) < 0)
+    goto err;
+
+  if (ctf_dump_header_sectfield (fp, state, "Function info section",
+ hp->cth_funcoff, hp->cth_varoff) < 0)
+    goto err;
+
+  if (ctf_dump_header_sectfield (fp, state, "Variable section",
+ hp->cth_varoff, hp->cth_typeoff) < 0)
+    goto err;
+
+  if (ctf_dump_header_sectfield (fp, state, "Type section",
+ hp->cth_typeoff, hp->cth_stroff) < 0)
+    goto err;
+
+  if (ctf_dump_header_sectfield (fp, state, "String section", hp->cth_stroff,
+ hp->cth_stroff + hp->cth_strlen + 1) < 0)
+    goto err;
+
+  return 0;
+ err:
+  return (ctf_set_errno (fp, -ENOMEM));
+}
+
 /* Dump a single label into the cds_items.  */
 
 static int
@@ -492,8 +618,7 @@ ctf_dump (ctf_file_t *fp, ctf_dump_state_t **statep, ctf_sect_names_t sect,
       switch (sect)
  {
  case CTF_SECT_HEADER:
-  /* Nothing doable (yet): entire header is discarded after read-phase.  */
-  str = strdup ("");
+  ctf_dump_header (fp, state);
   break;
  case CTF_SECT_LABEL:
   if (ctf_label_iter (fp, ctf_dump_label, state) < 0)
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 1cfab431ca..5b331cbc6d 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -218,6 +218,7 @@ struct ctf_file
 {
   const ctf_fileops_t *ctf_fileops; /* Version-specific file operations.  */
   struct ctf_header *ctf_header;    /* The header from this CTF file.  */
+  unsigned char ctf_openflags;    /* Flags the file had when opened.  */
   ctf_sect_t ctf_data;    /* CTF data from object file.  */
   ctf_sect_t ctf_symtab;    /* Symbol table from object file.  */
   ctf_sect_t ctf_strtab;    /* String table from object file.  */
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 46fb42e1e3..5d070acd3f 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1323,11 +1323,7 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
 
   if (foreign_endian)
     flip_header (hp);
-
-  ctf_dprintf ("header offsets: %x/%x/%x/%x/%x/%x/%x\n",
-       hp->cth_lbloff, hp->cth_objtoff, hp->cth_funcoff,
-       hp->cth_varoff, hp->cth_typeoff, hp->cth_stroff,
-               hp->cth_strlen);
+  fp->ctf_openflags = hp->cth_flags;
 
   fp->ctf_size = hp->cth_stroff + hp->cth_strlen;
 
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 04/19 REVIEW] libctf, bfd: fix ctf_bfdopen_ctfsect opening symbol and string sections

Nick Alcock
In reply to this post by Nick Alcock
The code in ctf_bfdopen_ctfsect (which is the ultimate place where you
end up if you use ctf_open to open a CTF file and pull in the ELF string
and symbol tables) was written before it was possible to actually test
it, since the linker was not written.  Now it is, it turns out that the
previous code was completely nonfunctional: it assumed that you could
load the symbol table via bfd_section_from_elf_index (...,elf_onesymtab())
and the string table via bfd_section_from_elf_index on the sh_link.

Unfortunately BFD loads neither of these sections in the conventional
fashion it uses for most others: the symbol table is immediately
converted into internal form (which is useless for our purposes, since
we also have to work in the absence of BFD for readelf, etc) and the
string table is loaded specially via bfd_elf_get_str_section which is
private to bfd/elf.c.

So make this function public, export it in elf-bfd.h, and use it from
libctf, which does something similar to what bfd_elf_sym_name and
bfd_elf_string_from_elf_section do.  Similarly, load the symbol table
manually using bfd_elf_get_elf_syms and throw away the internal form
it generates for us (we never use it).

BFD allocates the strtab for us via bfd_alloc, so we can leave BFD to
deallocate it: we allocate the symbol table ourselves before calling
bfd_elf_get_elf_syms, so we still have to free it.

Also change the rules around what you are allowed to provide: It is
useful to provide a string section but no symbol table, because CTF
sections can legitimately have no function info or data object sections
while relying on the ELF strtab for some of their strings.  So allow
that combination.

bfd/
        * elf-bfd.h (bfd_elf_get_str_section): Add.
        * elf.c (bfd_elf_get_str_section): No longer static.

libctf/
        * ctf-open-bfd.c: Add <assert.h>.
        (ctf_bfdopen_ctfsect): Open string and symbol tables using
        techniques borrowed from bfd_elf_sym_name.
        (ctf_new_archive_internal): Improve comment.
        * ctf-archive.c (ctf_arc_close): Do not free the ctfi_strsect.
        * ctf-open.c (ctf_bufopen): Allow opening with a string section but
        no symbol section, but not vice versa.
---
 bfd/elf-bfd.h         |   1 +
 bfd/elf.c             |   2 +-
 libctf/ctf-archive.c  |   2 +-
 libctf/ctf-open-bfd.c | 106 ++++++++++++++++++++++++------------------
 libctf/ctf-open.c     |   2 +-
 5 files changed, 64 insertions(+), 49 deletions(-)

The bfd bits of this (de-staticking one function) need review, but I hope
they will be uncontroversial.

diff --git a/bfd/elf-bfd.h b/bfd/elf-bfd.h
index a6a831b206..00443e6089 100644
--- a/bfd/elf-bfd.h
+++ b/bfd/elf-bfd.h
@@ -2038,6 +2038,7 @@ extern char *bfd_elf_string_from_elf_section
 extern Elf_Internal_Sym *bfd_elf_get_elf_syms
   (bfd *, Elf_Internal_Shdr *, size_t, size_t, Elf_Internal_Sym *, void *,
    Elf_External_Sym_Shndx *);
+extern char * bfd_elf_get_str_section (bfd *, unsigned int);
 extern const char *bfd_elf_sym_name
   (bfd *, Elf_Internal_Shdr *, Elf_Internal_Sym *, asection *);
 
diff --git a/bfd/elf.c b/bfd/elf.c
index 265150d511..1c843327cf 100644
--- a/bfd/elf.c
+++ b/bfd/elf.c
@@ -275,7 +275,7 @@ bfd_elf_mkcorefile (bfd *abfd)
   return elf_tdata (abfd)->core != NULL;
 }
 
-static char *
+char *
 bfd_elf_get_str_section (bfd *abfd, unsigned int shindex)
 {
   Elf_Internal_Shdr **i_shdrp;
diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c
index 5c1692219e..a13bac8cd6 100644
--- a/libctf/ctf-archive.c
+++ b/libctf/ctf-archive.c
@@ -405,7 +405,7 @@ ctf_arc_close (ctf_archive_t *arc)
   else
     ctf_file_close (arc->ctfi_file);
   free ((void *) arc->ctfi_symsect.cts_data);
-  free ((void *) arc->ctfi_strsect.cts_data);
+  /* Do not free the ctfi_strsect: it is bound to the bfd.  */
   free (arc->ctfi_data);
   free (arc);
 }
diff --git a/libctf/ctf-open-bfd.c b/libctf/ctf-open-bfd.c
index bb2d7e64de..97489789ce 100644
--- a/libctf/ctf-open-bfd.c
+++ b/libctf/ctf-open-bfd.c
@@ -19,6 +19,7 @@
 
 #include <ctf-impl.h>
 #include <stddef.h>
+#include <assert.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <errno.h>
@@ -32,8 +33,9 @@
 #include "elf-bfd.h"
 
 /* Make a new struct ctf_archive_internal wrapper for a ctf_archive or a
-   ctf_file.  Closes ARC and/or FP on error.  Arrange to free the SYMSECT and
-   STRSECT interior on close.  */
+   ctf_file.  Closes ARC and/or FP on error.  Arrange to free the SYMSECT or
+   STRSECT, as needed, on close (though the STRSECT interior is bound to the bfd
+   * and is not actually freed by this machinery).  */
 
 static struct ctf_archive_internal *
 ctf_new_archive_internal (int is_archive, struct ctf_archive *arc,
@@ -118,7 +120,7 @@ ctf_bfdopen (struct bfd *abfd, int *errp)
    later.  */
 
 ctf_archive_t *
-ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_,
+ctf_bfdopen_ctfsect (struct bfd *abfd,
      const ctf_sect_t *ctfsect, int *errp)
 {
   struct ctf_archive *arc = NULL;
@@ -130,50 +132,63 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_,
   int is_archive;
 
 #ifdef HAVE_BFD_ELF
-  asection *sym_asect;
   ctf_sect_t symsect, strsect;
+  Elf_Internal_Shdr *strhdr;
+  Elf_Internal_Shdr *symhdr = &elf_symtab_hdr (abfd);
+  size_t symcount = symhdr->sh_size / symhdr->sh_entsize;
+  Elf_Internal_Sym *isymbuf;
+  bfd_byte *symtab;
+  const char *strtab = NULL;
   /* TODO: handle SYMTAB_SHNDX.  */
 
-  if ((sym_asect = bfd_section_from_elf_index (abfd,
-       elf_onesymtab (abfd))) != NULL)
+  if ((symtab = malloc (symhdr->sh_size)) == NULL)
     {
-      Elf_Internal_Shdr *symhdr = &elf_symtab_hdr (abfd);
-      asection *str_asect = NULL;
-      bfd_byte *contents;
+      bfderrstr = "Cannot malloc symbol table";
+      goto err;
+    }
 
-      if (symhdr->sh_link != SHN_UNDEF &&
-  symhdr->sh_link <= elf_numsections (abfd))
- str_asect = bfd_section_from_elf_index (abfd, symhdr->sh_link);
+  isymbuf = bfd_elf_get_elf_syms (abfd, symhdr, symcount, 0,
+                                  NULL, symtab, NULL);
+  free (isymbuf);
+  if (isymbuf == NULL)
+    {
+      bfderrstr = "Cannot read symbol table";
+      goto err_free_sym;
+    }
 
-      Elf_Internal_Shdr *strhdr = elf_elfsections (abfd)[symhdr->sh_link];
+  if (elf_elfsections (abfd) != NULL
+      && symhdr->sh_link < elf_numsections (abfd))
+    {
+      strhdr = elf_elfsections (abfd)[symhdr->sh_link];
+      if (strhdr->contents == NULL)
+        {
+          if ((strtab = bfd_elf_get_str_section (abfd, symhdr->sh_link)) == NULL)
+            {
+              bfderrstr = "Cannot read string table";
+              goto err_free_sym;
+            }
+        }
+      else
+        strtab = (const char *) strhdr->contents;
+    }
 
-      if (sym_asect && str_asect)
- {
-  if (!bfd_malloc_and_get_section (abfd, str_asect, &contents))
-    {
-      bfderrstr = "Cannot malloc string table";
-      free (contents);
-      goto err;
-    }
-  strsect.cts_data = contents;
-  strsect.cts_name = (char *) strsect.cts_data + strhdr->sh_name;
-  strsect.cts_size = bfd_section_size (abfd, str_asect);
-  strsect.cts_entsize = strhdr->sh_size;
-  strsectp = &strsect;
-
-  if (!bfd_malloc_and_get_section (abfd, sym_asect, &contents))
-    {
-      bfderrstr = "Cannot malloc symbol table";
-      free (contents);
-      goto err_free_str;
-    }
-
-  symsect.cts_name = (char *) strsect.cts_data + symhdr->sh_name;
-  symsect.cts_entsize = symhdr->sh_size;
-  symsect.cts_size = bfd_section_size (abfd, sym_asect);
-  symsect.cts_data = contents;
-  symsectp = &symsect;
- }
+  if (strtab)
+    {
+      /* The names here are more or less arbitrary, but there is no point
+         thrashing around digging the name out of the shstrtab given that we don't
+         use it for anything but debugging.  */
+
+      strsect.cts_data = strtab;
+      strsect.cts_name = ".strtab";
+      strsect.cts_size = strhdr->sh_size;
+      strsectp = &strsect;
+
+      assert (symhdr->sh_entsize == get_elf_backend_data (abfd)->s->sizeof_sym);
+      symsect.cts_name = ".symtab";
+      symsect.cts_entsize = symhdr->sh_entsize;
+      symsect.cts_size = symhdr->sh_size;
+      symsect.cts_data = symtab;
+      symsectp = &symsect;
     }
 #endif
 
@@ -183,7 +198,7 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_,
       is_archive = 1;
       if ((arc = ctf_arc_bufopen ((void *) ctfsect->cts_data,
   ctfsect->cts_size, errp)) == NULL)
- goto err_free_sym;
+ goto err_free_str;
     }
   else
     {
@@ -192,7 +207,7 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_,
  {
   ctf_dprintf ("ctf_internal_open(): cannot open CTF: %s\n",
        ctf_errmsg (*errp));
-  goto err_free_sym;
+  goto err_free_str;
  }
     }
   arci = ctf_new_archive_internal (is_archive, arc, fp, symsectp, strsectp,
@@ -200,11 +215,10 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_,
 
   if (arci)
     return arci;
- err_free_sym:
+ err_free_str: ;
 #ifdef HAVE_BFD_ELF
-  free ((void *) symsect.cts_data);
-err_free_str:
-  free ((void *) strsect.cts_data);
+ err_free_sym:
+  free (symtab);
 #endif
 err: _libctf_unused_;
   if (bfderrstr)
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 5d070acd3f..61c5c88062 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1244,7 +1244,7 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
 
   libctf_init_debug();
 
-  if (ctfsect == NULL || ((symsect == NULL) != (strsect == NULL)))
+  if ((ctfsect == NULL) || ((symsect != NULL) && (strsect == NULL)))
     return (ctf_set_open_errno (errp, EINVAL));
 
   if (symsect != NULL && symsect->cts_entsize != sizeof (Elf32_Sym) &&
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 05/19] libctf: add the object index and function index sections

Nick Alcock
In reply to this post by Nick Alcock
No code handles these yet, but our latest GCC patches are generating
them, so we have to be ready for them or erroneously conclude that we
have file corruption.

(This simultaneously fixes a longstanding bug, concealed because nothing
was generating anything in the object or function info sections, where
the end of the section was being tested against the wrong thing: it
would have walked over the entire contents of the variable section and
treated them as part of the function info section.  This had to change
now anyway because the new sections have landed in between.)

include/
        * ctf.h: Add object index and function index sections.  Describe
        them. Improve the description of the variable section and clarify
        the constraints on backward-pointing type nodes.
        (ctf_header): Add cth_objtidxoff, cth_funcidxoff.

libctf/
        * ctf-open.c (init_symtab): Check for overflow against the right
        section.
        (upgrade_header): Set cth_objtidxoff, cth_funcidxoff to zero-length.
        (upgrade_types_v1): Note that these sections are not checked.
        (flip_header): Endian-swap the header fields.
        (flip_ctf): Endian-swap the sections.
        (flip_objts): Update comment.
        (ctf_bufopen): Check header offsets and alignment for validity.
---
 include/ctf.h     | 32 ++++++++++++++++++++++++++------
 libctf/ctf-open.c | 31 ++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/include/ctf.h b/include/ctf.h
index 7e00005d27..f371cd73c9 100644
--- a/include/ctf.h
+++ b/include/ctf.h
@@ -52,10 +52,15 @@ extern "C"
 
    The CTF file or section itself has the following structure:
 
-   +--------+--------+---------+----------+----------+-------+--------+
-   |  file  |  type  |  data   | function | variable | data  | string |
-   | header | labels | objects |   info   |   info   | types | table  |
-   +--------+--------+---------+----------+----------+-------+--------+
+   +--------+--------+---------+----------+--------+----------+...
+   |  file  |  type  |  data   | function | object | function |...
+   | header | labels | objects |   info   | index  |  index   |...
+   +--------+--------+---------+----------+--------+----------+...
+
+   ...+----------+-------+--------+
+   ...| variable | data  | string |
+   ...|   info   | types | table  |
+      +----------+-------+--------+
 
    The file header stores a magic number and version information, encoding
    flags, and the byte offset of each of the sections relative to the end of the
@@ -74,14 +79,27 @@ extern "C"
    For each data object, the type ID (a small integer) is recorded.  For each
    function, the type ID of the return type and argument types is recorded.
 
+   For situations in which the order of the symbols in the symtab is not known,
+   a pair of optional indexes follow the data object and function info sections:
+   each of these is an array of strtab indexes, mapped 1:1 to the corresponding
+   data object / function info section, giving each entry in those sections a
+   name so that the linker can correlate them with final symtab entries and
+   reorder them accordingly (dropping the indexes in the process).
+
    Variable records (as distinct from data objects) provide a modicum of support
    for non-ELF systems, mapping a variable name to a CTF type ID.  The variable
-   names are sorted into ASCIIbetical order, permitting binary searching.
+   names are sorted into ASCIIbetical order, permitting binary searching.  We do
+   not define how the consumer maps these variable names to addresses or
+   anything else, or indeed what these names represent: they might be names
+   looked up at runtime via dlsym() or names extracted at runtime by a debugger
+   or anything else the consumer likes.
 
    The data types section is a list of variable size records that represent each
    type, in order by their ID.  The types themselves form a directed graph,
    where each node may contain one or more outgoing edges to other type nodes,
-   denoted by their ID.
+   denoted by their ID.  Most type nodes are standalone or point backwards to
+   earlier nodes, but this is not required: nodes can point to later nodes,
+   particularly structure and union members.
 
    Strings are recorded as a string table ID (0 or 1) and a byte offset into the
    string table.  String table 0 is the internal CTF string table.  String table
@@ -149,6 +167,8 @@ typedef struct ctf_header
   uint32_t cth_lbloff; /* Offset of label section.  */
   uint32_t cth_objtoff; /* Offset of object section.  */
   uint32_t cth_funcoff; /* Offset of function section.  */
+  uint32_t cth_objtidxoff; /* Offset of object index section.  */
+  uint32_t cth_funcidxoff; /* Offset of function index section.  */
   uint32_t cth_varoff; /* Offset of variable section.  */
   uint32_t cth_typeoff; /* Offset of type section.  */
   uint32_t cth_stroff; /* Offset of string section.  */
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 61c5c88062..ee09ca123b 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -278,7 +278,7 @@ init_symtab (ctf_file_t *fp, const ctf_header_t *hp,
   break;
 
  case STT_FUNC:
-  if (funcoff >= hp->cth_typeoff)
+  if (funcoff >= hp->cth_objtidxoff)
     {
       *xp = -1u;
       break;
@@ -376,6 +376,8 @@ upgrade_header (ctf_header_t *hp)
   hp->cth_stroff = oldhp->cth_stroff;
   hp->cth_typeoff = oldhp->cth_typeoff;
   hp->cth_varoff = oldhp->cth_varoff;
+  hp->cth_funcidxoff = hp->cth_varoff; /* No index sections.  */
+  hp->cth_objtidxoff = hp->cth_funcidxoff;
   hp->cth_funcoff = oldhp->cth_funcoff;
   hp->cth_objtoff = oldhp->cth_objtoff;
   hp->cth_lbloff = oldhp->cth_lbloff;
@@ -388,6 +390,9 @@ upgrade_header (ctf_header_t *hp)
    The upgrade is not done in-place: the ctf_base is moved.  ctf_strptr() must
    not be called before reallocation is complete.
 
+   Sections not checked here due to nonexistence or nonpopulated state in older
+   formats: objtidx, funcidx.
+
    Type kinds not checked here due to nonexistence in older formats:
       CTF_K_SLICE.  */
 static int
@@ -967,6 +972,8 @@ flip_header (ctf_header_t *cth)
   swap_thing (cth->cth_cuname);
   swap_thing (cth->cth_objtoff);
   swap_thing (cth->cth_funcoff);
+  swap_thing (cth->cth_objtidxoff);
+  swap_thing (cth->cth_funcidxoff);
   swap_thing (cth->cth_varoff);
   swap_thing (cth->cth_typeoff);
   swap_thing (cth->cth_stroff);
@@ -987,10 +994,10 @@ flip_lbls (void *start, size_t len)
     }
 }
 
-/* Flip the endianness of the data-object or function sections, an array of
-   uint32_t.  (The function section has more internal structure, but that
-   structure is an array of uint32_t, so can be treated as one big array for
-   byte-swapping.)  */
+/* Flip the endianness of the data-object or function sections or their indexes,
+   all arrays of uint32_t.  (The function section has more internal structure,
+   but that structure is an array of uint32_t, so can be treated as one big
+   array for byte-swapping.)  */
 
 static void
 flip_objts (void *start, size_t len)
@@ -1176,7 +1183,9 @@ flip_ctf (ctf_header_t *cth, unsigned char *buf)
 {
   flip_lbls (buf + cth->cth_lbloff, cth->cth_objtoff - cth->cth_lbloff);
   flip_objts (buf + cth->cth_objtoff, cth->cth_funcoff - cth->cth_objtoff);
-  flip_objts (buf + cth->cth_funcoff, cth->cth_varoff - cth->cth_funcoff);
+  flip_objts (buf + cth->cth_funcoff, cth->cth_objtidxoff - cth->cth_funcoff);
+  flip_objts (buf + cth->cth_objtidxoff, cth->cth_funcidxoff - cth->cth_objtidxoff);
+  flip_objts (buf + cth->cth_funcidxoff, cth->cth_varoff - cth->cth_funcidxoff);
   flip_vars (buf + cth->cth_varoff, cth->cth_typeoff - cth->cth_varoff);
   return flip_types (buf + cth->cth_typeoff, cth->cth_stroff - cth->cth_typeoff);
 }
@@ -1331,19 +1340,23 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
        (unsigned long) fp->ctf_size);
 
   if (hp->cth_lbloff > fp->ctf_size || hp->cth_objtoff > fp->ctf_size
-      || hp->cth_funcoff > fp->ctf_size || hp->cth_typeoff > fp->ctf_size
+      || hp->cth_funcoff > fp->ctf_size || hp->cth_objtidxoff > fp->ctf_size
+      || hp->cth_funcidxoff > fp->ctf_size || hp->cth_typeoff > fp->ctf_size
       || hp->cth_stroff > fp->ctf_size)
     return (ctf_set_open_errno (errp, ECTF_CORRUPT));
 
   if (hp->cth_lbloff > hp->cth_objtoff
       || hp->cth_objtoff > hp->cth_funcoff
       || hp->cth_funcoff > hp->cth_typeoff
-      || hp->cth_funcoff > hp->cth_varoff
+      || hp->cth_funcoff > hp->cth_objtidxoff
+      || hp->cth_objtidxoff > hp->cth_funcidxoff
+      || hp->cth_funcidxoff > hp->cth_varoff
       || hp->cth_varoff > hp->cth_typeoff || hp->cth_typeoff > hp->cth_stroff)
     return (ctf_set_open_errno (errp, ECTF_CORRUPT));
 
   if ((hp->cth_lbloff & 3) || (hp->cth_objtoff & 2)
-      || (hp->cth_funcoff & 2) || (hp->cth_varoff & 3)
+      || (hp->cth_funcoff & 2) || (hp->cth_objtidxoff & 2)
+      || (hp->cth_funcidxoff & 2) || (hp->cth_varoff & 3)
       || (hp->cth_typeoff & 3))
     return (ctf_set_open_errno (errp, ECTF_CORRUPT));
 
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 06/19 REVIEW] binutils: readelf: when dumping CTF, load strtab and symtab automatically

Nick Alcock
In reply to this post by Nick Alcock
We were only loading them when explicitly requested, which leads to
strings that point off into empty space (into the non-loaded "external"
ELF string table).  Avoid this unfortunate consequence by loading the
strtab and symtab by default, unless a blank name is given.

binutils/
        * readelf.c (dump_ctf_symtab_name): Give default value.
        (dump_ctf_strtab_name): Likewise.
        (dump_section_as_ctf): Allow for the null string.
---
 binutils/doc/binutils.texi |  3 ++-
 binutils/readelf.c         | 10 ++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

Technically needs review because it's not in libctf.

diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi
index f3c4eaea4f..8e531b2306 100644
--- a/binutils/doc/binutils.texi
+++ b/binutils/doc/binutils.texi
@@ -4830,7 +4830,8 @@ command to @command{ar}, but without using the BFD library.  @xref{ar}.
 @item --ctf-symbols=@var{section}
 @item --ctf-strings=@var{section}
 Specify the name of another section from which the CTF file can inherit
-strings and symbols.
+strings and symbols.  By default, the @code{.symtab} and its linked
+string table are used.
 
 If either of @option{--ctf-symbols} or @option{--ctf-strings} is specified, the
 other must be specified as well.
diff --git a/binutils/readelf.c b/binutils/readelf.c
index ea35aeef5c..044fd80186 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -13884,7 +13884,13 @@ dump_section_as_ctf (Elf_Internal_Shdr * section, Filedata * filedata)
   data = get_section_contents (section, filedata);
   ctfsect.cts_data = data;
 
-  if (dump_ctf_symtab_name)
+  if (!dump_ctf_symtab_name)
+    dump_ctf_symtab_name = strdup (".symtab");
+
+  if (!dump_ctf_strtab_name)
+    dump_ctf_strtab_name = strdup (".strtab");
+
+  if (dump_ctf_symtab_name && dump_ctf_symtab_name[0] != 0)
     {
       if ((symtab_sec = find_section (filedata, dump_ctf_symtab_name)) == NULL)
  {
@@ -13899,7 +13905,7 @@ dump_section_as_ctf (Elf_Internal_Shdr * section, Filedata * filedata)
       symsectp = shdr_to_ctf_sect (&symsect, symtab_sec, filedata);
       symsect.cts_data = symdata;
     }
-  if (dump_ctf_strtab_name)
+  if (dump_ctf_strtab_name && dump_ctf_symtab_name[0] != 0)
     {
       if ((strtab_sec = find_section (filedata, dump_ctf_strtab_name)) == NULL)
  {
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 07/19 REVIEW] binutils: objdump does not take --ctf-symbols or --ctf-strings options

Nick Alcock
In reply to this post by Nick Alcock
libctf figures out what to load itself, with no overriding currently
possible, so remove the documentation of these nonexistent options.
---
 binutils/doc/binutils.texi | 2 --
 1 file changed, 2 deletions(-)

Technically needs review.

diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi
index 8e531b2306..f27afa86cc 100644
--- a/binutils/doc/binutils.texi
+++ b/binutils/doc/binutils.texi
@@ -2124,8 +2124,6 @@ objdump [@option{-a}|@option{--archive-headers}]
         [@option{--dwarf-depth=@var{n}}]
         [@option{--dwarf-start=@var{n}}]
         [@option{--ctf-parent=}@var{section}]
-        [@option{--ctf-symbols=}@var{section}]
-        [@option{--ctf-strings=}@var{section}]
         [@option{--no-recurse-limit}|@option{--recurse-limit}]
         [@option{--special-syms}]
         [@option{--prefix=}@var{prefix}]
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 08/19] libctf: Add iteration over non-root types

Nick Alcock
In reply to this post by Nick Alcock
The existing function ctf_type_iter lets you iterate over root-visible
types (types you can look up by name).  There is no way to iterate over
non-root-visible types, which is troublesome because both the linker
and dumper want to do that.

So add a new function that can do it: the callback it takes accepts
an extra parameter which indicates whether the type is root-visible
or not.

include/
        * ctf-api.h (ctf_type_all_f): New.
        (ctf_type_iter_all): New.

libctf/
        * ctf_types.c (ctf_type_iter_all): New.
---
 include/ctf-api.h  |  2 ++
 libctf/ctf-types.c | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/include/ctf-api.h b/include/ctf-api.h
index 42c3c9a319..ba3cc34cad 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -211,6 +211,7 @@ typedef int ctf_member_f (const char *name, ctf_id_t membtype,
 typedef int ctf_enum_f (const char *name, int val, void *arg);
 typedef int ctf_variable_f (const char *name, ctf_id_t type, void *arg);
 typedef int ctf_type_f (ctf_id_t type, void *arg);
+typedef int ctf_type_all_f (ctf_id_t type, int flag, void *arg);
 typedef int ctf_label_f (const char *name, const ctf_lblinfo_t *info,
  void *arg);
 typedef int ctf_archive_member_f (ctf_file_t *fp, const char *name, void *arg);
@@ -314,6 +315,7 @@ extern int ctf_label_info (ctf_file_t *, const char *, ctf_lblinfo_t *);
 extern int ctf_member_iter (ctf_file_t *, ctf_id_t, ctf_member_f *, void *);
 extern int ctf_enum_iter (ctf_file_t *, ctf_id_t, ctf_enum_f *, void *);
 extern int ctf_type_iter (ctf_file_t *, ctf_type_f *, void *);
+extern int ctf_type_iter_all (ctf_file_t *, ctf_type_all_f *, void *);
 extern int ctf_label_iter (ctf_file_t *, ctf_label_f *, void *);
 extern int ctf_variable_iter (ctf_file_t *, ctf_variable_f *, void *);
 extern int ctf_archive_iter (const ctf_archive_t *, ctf_archive_member_f *,
diff --git a/libctf/ctf-types.c b/libctf/ctf-types.c
index dc158e2f52..b53835676a 100644
--- a/libctf/ctf-types.c
+++ b/libctf/ctf-types.c
@@ -144,6 +144,27 @@ ctf_type_iter (ctf_file_t *fp, ctf_type_f *func, void *arg)
   return 0;
 }
 
+/* Iterate over every type in the given CTF container, user-visible or not.
+   We pass the type ID of each type to the specified callback function.  */
+
+int
+ctf_type_iter_all (ctf_file_t *fp, ctf_type_all_f *func, void *arg)
+{
+  ctf_id_t id, max = fp->ctf_typemax;
+  int rc, child = (fp->ctf_flags & LCTF_CHILD);
+
+  for (id = 1; id <= max; id++)
+    {
+      const ctf_type_t *tp = LCTF_INDEX_TO_TYPEPTR (fp, id);
+      if ((rc = func (LCTF_INDEX_TO_TYPE (fp, id, child),
+      LCTF_INFO_ISROOT(fp, tp->ctt_info)
+      ? CTF_ADD_ROOT : CTF_ADD_NONROOT, arg) != 0))
+ return rc;
+    }
+
+  return 0;
+}
+
 /* Iterate over every variable in the given CTF container, in arbitrary order.
    We pass the name of each variable to the specified callback function.  */
 
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 09/19] libctf: support getting strings from the ELF strtab

Nick Alcock
In reply to this post by Nick Alcock
The CTF file format has always supported "external strtabs", which
internally are strtab offsets with their MSB on: such refs
get their strings from the strtab passed in at CTF file open time:
this is usually intended to be the ELF strtab, and that's what this
implementation is meant to support, though in theory the external
strtab could come from anywhere.

This commit adds support for these external strings in the ctf-string.c
strtab tracking layer.  It's quite easy: we just add a field csa_offset
to the atoms table that tracks all strings: this field tracks the offset
of the string in the ELF strtab (with its MSB already on, courtesy of
a new macro CTF_SET_STID), and adds a new function that sets the
csa_offset to the specified offset (plus MSB).  Then we just need to
avoid writing out strings with the csa_offset set to the internal
strtab, and note that the internal strtab is shorter than it might
otherwise be.

(We could in theory save a little more time here by eschewing sorting
such strings, since we never actually write the strings out anywhere,
but that would mean storing them separately and it's just not worth the
complexity cost until profiling shows it's worth doing.)

include/
        * ctf.h (CTF_SET_STID): New.

libctf/
        * ctf-impl.h (struct ctf_str_atom): New field csa_offset.
        (ctf_str_add_ref): Name the last arg.
        (ctf_str_add_external) New.
        * ctf-string.c (ctf_str_add_ref_internal): Return the atom, not the
        string.
        (ctf_str_add): Adjust accordingly.
        (ctf_str_add_ref): Likewise.  Move up in the file.
        (ctf_str_add_external): New: update the csa_offset.
        (ctf_str_count_strtab): Only account for strings with no csa_offset
        in the internal strtab length.
        (ctf_str_write_strtab): If the csa_offset is set, update the
        string's refs without writing the string out.
---
 include/ctf.h       |  1 +
 libctf/ctf-impl.h   |  4 +-
 libctf/ctf-string.c | 90 ++++++++++++++++++++++++++++++++-------------
 3 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/include/ctf.h b/include/ctf.h
index f371cd73c9..ff3204b9aa 100644
--- a/include/ctf.h
+++ b/include/ctf.h
@@ -353,6 +353,7 @@ union
 
 #define CTF_NAME_STID(name) ((name) >> 31)
 #define CTF_NAME_OFFSET(name) ((name) & CTF_MAX_NAME)
+#define CTF_SET_STID(name, stid) ((name) | (stid) << 31)
 
 /* V2 only. */
 #define CTF_TYPE_INFO(kind, isroot, vlen) \
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 5b331cbc6d..1c243d758c 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -192,6 +192,7 @@ typedef struct ctf_str_atom
 {
   const char *csa_str; /* Backpointer to string (hash key).  */
   ctf_list_t csa_refs; /* This string's refs.  */
+  uint32_t csa_offset; /* External strtab offset, if any.  */
   unsigned long csa_snapshot_id; /* Snapshot ID at time of creation.  */
 } ctf_str_atom_t;
 
@@ -380,7 +381,8 @@ extern const char *ctf_strptr (ctf_file_t *, uint32_t);
 extern int ctf_str_create_atoms (ctf_file_t *);
 extern void ctf_str_free_atoms (ctf_file_t *);
 extern const char *ctf_str_add (ctf_file_t *, const char *);
-extern const char *ctf_str_add_ref (ctf_file_t *, const char *, uint32_t *);
+extern const char *ctf_str_add_ref (ctf_file_t *, const char *, uint32_t *ref);
+extern const char *ctf_str_add_external (ctf_file_t *, const char *, uint32_t offset);
 extern void ctf_str_rollback (ctf_file_t *, ctf_snapshot_id_t);
 extern void ctf_str_purge_refs (ctf_file_t *);
 extern ctf_strs_writable_t ctf_str_write_strtab (ctf_file_t *);
diff --git a/libctf/ctf-string.c b/libctf/ctf-string.c
index 27bd7c2bba..4d063bb5e6 100644
--- a/libctf/ctf-string.c
+++ b/libctf/ctf-string.c
@@ -88,11 +88,11 @@ ctf_str_free_atoms (ctf_file_t *fp)
   ctf_dynhash_destroy (fp->ctf_str_atoms);
 }
 
-/* Add a string to the atoms table and return it, or return an existing string
-   if present, copying the passed-in string.  Returns NULL only when out of
-   memory (and do not touch the passed-in string in that case).  Possibly
-   augment the ref list with the passed-in ref.  */
-static const char *
+/* Add a string to the atoms table, copying the passed-in string.  Return the
+   atom added. Return NULL only when out of memory (and do not touch the
+   passed-in string in that case).  Possibly augment the ref list with the
+   passed-in ref.  */
+static ctf_str_atom_t *
 ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
   int add_ref, uint32_t *ref)
 {
@@ -116,7 +116,7 @@ ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
   ctf_list_append (&atom->csa_refs, aref);
   fp->ctf_str_num_refs++;
  }
-      return atom->csa_str;
+      return atom;
     }
 
   if ((atom = ctf_alloc (sizeof (struct ctf_str_atom))) == NULL)
@@ -136,7 +136,7 @@ ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
       ctf_list_append (&atom->csa_refs, aref);
       fp->ctf_str_num_refs++;
     }
-  return newstr;
+  return atom;
 
  oom:
   ctf_free (atom);
@@ -150,9 +150,48 @@ ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
 const char *
 ctf_str_add (ctf_file_t *fp, const char *str)
 {
-  if (str)
-    return ctf_str_add_ref_internal (fp, str, FALSE, 0);
-  return NULL;
+  ctf_str_atom_t *atom;
+  if (!str)
+    return NULL;
+
+  atom = ctf_str_add_ref_internal (fp, str, FALSE, 0);
+  if (!atom)
+    return NULL;
+
+  return atom->csa_str;
+}
+
+/* Like ctf_str_add(), but additionally augment the atom's refs list with the
+   passed-in ref, whether or not the string is already present.  There is no
+   attempt to deduplicate the refs list (but duplicates are harmless).  */
+const char *
+ctf_str_add_ref (ctf_file_t *fp, const char *str, uint32_t *ref)
+{
+  ctf_str_atom_t *atom;
+  if (!str)
+    return NULL;
+
+  atom = ctf_str_add_ref_internal (fp, str, TRUE, ref);
+  if (!atom)
+    return NULL;
+
+  return atom->csa_str;
+}
+
+/* Add an external strtab reference at OFFSET.  */
+const char *
+ctf_str_add_external (ctf_file_t *fp, const char *str, uint32_t offset)
+{
+  ctf_str_atom_t *atom;
+  if (!str)
+    return NULL;
+
+  atom = ctf_str_add_ref_internal (fp, str, FALSE, 0);
+  if (!atom)
+    return NULL;
+
+  atom->csa_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
+  return atom->csa_str;
 }
 
 /* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
@@ -173,17 +212,6 @@ ctf_str_rollback (ctf_file_t *fp, ctf_snapshot_id_t id)
   ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
 }
 
-/* Like ctf_str_add(), but additionally augment the atom's refs list with the
-   passed-in ref, whether or not the string is already present.  There is no
-   attempt to deduplicate the refs list (but duplicates are harmless).  */
-const char *
-ctf_str_add_ref (ctf_file_t *fp, const char *str, uint32_t *ref)
-{
-  if (str)
-    return ctf_str_add_ref_internal (fp, str, TRUE, ref);
-  return NULL;
-}
-
 /* An adaptor around ctf_purge_atom_refs.  */
 static void
 ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
@@ -238,7 +266,11 @@ ctf_str_count_strtab (void *key _libctf_unused_, void *value,
   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
   ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
 
-  s->strtab->cts_len += strlen (atom->csa_str) + 1;
+  /* We only factor in the length of items that have no offset:
+     other items are in the external strtab.  They still contribute to the
+     total count, though, because we still have to sort them.  */
+  if (!atom->csa_offset)
+    s->strtab->cts_len += strlen (atom->csa_str) + 1;
   s->strtab_count++;
 }
 
@@ -317,12 +349,18 @@ ctf_str_write_strtab (ctf_file_t *fp)
       return strtab;
     }
 
-  /* Update the strtab, and all refs.  */
+  /* Update all refs: also update the strtab if this is not an external strtab
+     pointer.  */
   for (i = 0; i < s.strtab_count; i++)
     {
-      strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
-      ctf_str_update_refs (sorttab[i], cur_stroff);
-      cur_stroff += strlen (sorttab[i]->csa_str) + 1;
+      if (sorttab[i]->csa_offset)
+ ctf_str_update_refs (sorttab[i], sorttab[i]->csa_offset);
+      else
+ {
+  ctf_str_update_refs (sorttab[i], cur_stroff);
+  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
+  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
+ }
     }
   free (sorttab);
 
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 10/19] libctf: write CTF files to memory, and CTF archives to fds

Nick Alcock
In reply to this post by Nick Alcock
Before now, we've been able to write CTF files to gzFile descriptors or
fds, and CTF archives to named files only.

Make this a bit less irregular by allowing CTF archives to be written
to fds with the new function ctf_arc_write_fd: also allow CTF
files to be written to a new memory buffer via ctf_write_mem.

(It would be nice to complete things by adding a new function to write
CTF archives to memory, but this is too difficult to do given the short
time the linker is expected to be writing them out: we will transition
to a better format in format v4, though we will always support reading
CTF archives that are stored in .ctf sections.)

include/
        * ctf-api.h (ctf_arc_write_fd): New.
        (ctf_write_mem): Likewise.
        (ctf_gzwrite): Spacing fix.

libctf/
        * ctf-archive.c (ctf_arc_write): Split off, and reimplement in terms
        of...
        (ctf_arc_write_fd): ... this new function.
        * ctf-create.c (ctf_write_mem): New.
---
 include/ctf-api.h    |  5 ++-
 libctf/ctf-archive.c | 93 +++++++++++++++++++++++++++-----------------
 libctf/ctf-create.c  | 51 ++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 36 deletions(-)

diff --git a/include/ctf-api.h b/include/ctf-api.h
index ba3cc34cad..c122f4c13e 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -258,6 +258,8 @@ extern void ctf_file_close (ctf_file_t *);
 
 extern int ctf_arc_write (const char *, ctf_file_t **, size_t,
   const char **, size_t);
+extern int ctf_arc_write_fd (int, ctf_file_t **, size_t, const char **,
+     size_t);
 
 extern const char *ctf_cuname (ctf_file_t *);
 extern void ctf_cuname_set (ctf_file_t *, const char *);
@@ -376,8 +378,9 @@ extern ctf_snapshot_id_t ctf_snapshot (ctf_file_t *);
 extern int ctf_rollback (ctf_file_t *, ctf_snapshot_id_t);
 extern int ctf_discard (ctf_file_t *);
 extern int ctf_write (ctf_file_t *, int);
-extern int ctf_gzwrite (ctf_file_t * fp, gzFile fd);
+extern int ctf_gzwrite (ctf_file_t *fp, gzFile fd);
 extern int ctf_compress_write (ctf_file_t * fp, int fd);
+extern unsigned char *ctf_write_mem (ctf_file_t *, size_t *, size_t threshold);
 
 extern void ctf_setdebug (int debug);
 extern int ctf_getdebug (void);
diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c
index a13bac8cd6..6a161f94b2 100644
--- a/libctf/ctf-archive.c
+++ b/libctf/ctf-archive.c
@@ -47,17 +47,17 @@ static int arc_mmap_unmap (void *header, size_t headersz, const char **errmsg);
 /* bsearch() internal state.  */
 static __thread char *search_nametbl;
 
-/* Write out a CTF archive.  The entries in CTF_FILES are referenced by name:
-   the names are passed in the names array, which must have CTF_FILES entries.
+/* Write out a CTF archive to the start of the file referenced by the passed-in
+   fd.  The entries in CTF_FILES are referenced by name: the names are passed in
+   the names array, which must have CTF_FILES entries.
 
    Returns 0 on success, or an errno, or an ECTF_* value.  */
 int
-ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
-       const char **names, size_t threshold)
+ctf_arc_write_fd (int fd, ctf_file_t **ctf_files, size_t ctf_file_cnt,
+  const char **names, size_t threshold)
 {
   const char *errmsg;
   struct ctf_archive *archdr;
-  int fd;
   size_t i;
   char dummy = 0;
   size_t headersz;
@@ -68,15 +68,9 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
   off_t nameoffs;
   struct ctf_archive_modent *modent;
 
-  ctf_dprintf ("Writing archive %s with %lu files\n", file,
+  ctf_dprintf ("Writing CTF archive with %lu files\n",
        (unsigned long) ctf_file_cnt);
 
-  if ((fd = open (file, O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC, 0666)) < 0)
-    {
-      errmsg = "ctf_arc_write(): cannot create %s: %s\n";
-      goto err;
-    }
-
   /* Figure out the size of the mmap()ed header, including the
      ctf_archive_modent array.  We assume that all of this needs no
      padding: a likely assumption, given that it's all made up of
@@ -91,20 +85,20 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
   ctf_startoffs = headersz;
   if (lseek (fd, ctf_startoffs - 1, SEEK_SET) < 0)
     {
-      errmsg = "ctf_arc_write(): cannot extend file while writing %s: %s\n";
-      goto err_close;
+      errmsg = "ctf_arc_write(): cannot extend file while writing: %s\n";
+      goto err;
     }
 
   if (write (fd, &dummy, 1) < 0)
     {
-      errmsg = "ctf_arc_write(): cannot extend file while writing %s: %s\n";
-      goto err_close;
+      errmsg = "ctf_arc_write(): cannot extend file while writing: %s\n";
+      goto err;
     }
 
   if ((archdr = arc_mmap_header (fd, headersz)) == NULL)
     {
-      errmsg = "ctf_arc_write(): Cannot mmap() %s: %s\n";
-      goto err_close;
+      errmsg = "ctf_arc_write(): Cannot mmap(): %s\n";
+      goto err;
     }
 
   /* Fill in everything we can, which is everything other than the name
@@ -137,7 +131,7 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
   nametbl = malloc (namesz);
   if (nametbl == NULL)
     {
-      errmsg = "Error writing named CTF to %s: %s\n";
+      errmsg = "Error writing named CTF to archive: %s\n";
       goto err_unmap;
     }
 
@@ -154,12 +148,12 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
       if ((off < 0) && (off > -ECTF_BASE))
  {
   errmsg = "ctf_arc_write(): Cannot determine file "
-    "position while writing %s: %s";
+    "position while writing to archive: %s";
   goto err_free;
  }
       if (off < 0)
  {
-  errmsg = "ctf_arc_write(): Cannot write CTF file to %s: %s\n";
+  errmsg = "ctf_arc_write(): Cannot write CTF file to archive: %s\n";
   errno = off * -1;
   goto err_free;
  }
@@ -181,7 +175,7 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
   if ((nameoffs = lseek (fd, 0, SEEK_CUR)) < 0)
     {
       errmsg = "ctf_arc_write(): Cannot get current file position "
- "in %s: %s\n";
+ "in archive: %s\n";
       goto err_free;
     }
   archdr->ctfa_names = htole64 (nameoffs);
@@ -191,7 +185,7 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
       ssize_t len;
       if ((len = write (fd, np, namesz)) < 0)
  {
-  errmsg = "ctf_arc_write(): Cannot write name table in %s: %s\n";
+  errmsg = "ctf_arc_write(): Cannot write name table to archive: %s\n";
   goto err_free;
  }
       namesz -= len;
@@ -202,29 +196,58 @@ ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
   if (arc_mmap_writeout (fd, archdr, headersz, &errmsg) < 0)
     goto err_unmap;
   if (arc_mmap_unmap (archdr, headersz, &errmsg) < 0)
-    goto err_unlink;
-  if (close (fd) < 0)
-    {
-      errmsg = "ctf_arc_write(): Cannot close after writing to %s: %s\n";
-      goto err_unlink;
-    }
-
+    goto err;
   return 0;
 
 err_free:
   free (nametbl);
 err_unmap:
   arc_mmap_unmap (archdr, headersz, NULL);
-err_close:
-  close (fd);
-err_unlink:
-  unlink (file);
 err:
-  ctf_dprintf (errmsg, file, errno < ECTF_BASE ? strerror (errno) :
+  ctf_dprintf (errmsg, errno < ECTF_BASE ? strerror (errno) :
        ctf_errmsg (errno));
   return errno;
 }
 
+/* Write out a CTF archive.  The entries in CTF_FILES are referenced by name:
+   the names are passed in the names array, which must have CTF_FILES entries.
+
+   If the filename is NULL, create a temporary file and return a pointer to it.
+
+   Returns 0 on success, or an errno, or an ECTF_* value.  */
+int
+ctf_arc_write (const char *file, ctf_file_t ** ctf_files, size_t ctf_file_cnt,
+       const char **names, size_t threshold)
+{
+  int err;
+  int fd;
+
+  if ((fd = open (file, O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC, 0666)) < 0)
+    {
+      ctf_dprintf ("ctf_arc_write(): cannot create %s: %s\n", file,
+   strerror (errno));
+      return errno;
+    }
+
+  err = ctf_arc_write_fd (fd, ctf_files, ctf_file_cnt, names, threshold);
+  if (err)
+    goto err;
+
+  if ((err = close (fd)) < 0)
+    {
+      ctf_dprintf ("ctf_arc_write(): Cannot close after writing to archive: "
+   "%s\n", strerror (errno));
+      goto err;
+    }
+
+ err:
+  close (fd);
+  if (err < 0)
+    unlink (file);
+
+  return err;
+}
+
 /* Write one CTF file out.  Return the file position of the written file (or
    rather, of the file-size uint64_t that precedes it): negative return is a
    negative errno or ctf_errno value.  On error, the file position may no longer
diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index 4ea288e451..5b6cc8b597 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -2012,6 +2012,57 @@ ret:
   return err;
 }
 
+/* Optionally compress the specified CTF data stream and return it as a new
+   dynamically-allocated string.  */
+unsigned char *
+ctf_write_mem (ctf_file_t *fp, size_t *size, size_t threshold)
+{
+  unsigned char *buf;
+  unsigned char *bp;
+  ctf_header_t *hp;
+  ssize_t header_len = sizeof (ctf_header_t);
+  ssize_t compress_len;
+  size_t max_compress_len = compressBound (fp->ctf_size);
+  int rc;
+
+  if (fp->ctf_size < threshold)
+    max_compress_len = fp->ctf_size;
+  if ((buf = malloc (max_compress_len
+     + sizeof (struct ctf_header))) == NULL)
+    {
+      ctf_set_errno (fp, ENOMEM);
+      return NULL;
+    }
+
+  hp = (ctf_header_t *) buf;
+  memcpy (hp, fp->ctf_header, header_len);
+  bp = buf + sizeof (struct ctf_header);
+  *size = sizeof (struct ctf_header);
+
+  compress_len = max_compress_len;
+
+  if (fp->ctf_size < threshold)
+    {
+      hp->cth_flags &= ~CTF_F_COMPRESS;
+      memcpy (bp, fp->ctf_buf, fp->ctf_size);
+      *size += fp->ctf_size;
+    }
+  else
+    {
+      hp->cth_flags |= CTF_F_COMPRESS;
+      if ((rc = compress (bp, (uLongf *) &compress_len,
+  fp->ctf_buf, fp->ctf_size)) != Z_OK)
+ {
+  ctf_dprintf ("zlib deflate err: %s\n", zError (rc));
+  ctf_set_errno (fp, ECTF_COMPRESS);
+  ctf_free (buf);
+  return NULL;
+ }
+      *size += compress_len;
+    }
+  return buf;
+}
+
 /* Write the uncompressed CTF data stream to the specified file descriptor.  */
 int
 ctf_write (ctf_file_t *fp, int fd)
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 11/19] libctf: fix memory leak on ctf_compress_write error path

Nick Alcock
In reply to this post by Nick Alcock
We were failing to free the compressed-data buffer if compression
failed.

libctf/
        * ctf-create.c (ctf_compress_write): Fix leak.
---
 libctf/ctf-create.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index 5b6cc8b597..2e05649d4a 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -1980,7 +1980,6 @@ ctf_compress_write (ctf_file_t *fp, int fd)
     {
       ctf_dprintf ("zlib deflate err: %s\n", zError (rc));
       err = ctf_set_errno (fp, ECTF_COMPRESS);
-      ctf_free (buf);
       goto ret;
     }
 
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 12/19] libctf: dump: support non-root type dumping

Nick Alcock
In reply to this post by Nick Alcock
Use the recently-added ctf_type_iter_all function to iterate over
non-root types, too, indicating them via {....} surrounding the type
description in the dump.

libctf/
        * ctf-dump.c (ctf_dump): Use ctf_type_iter_all to dump types, not
        ctf_type_iter.
        (ctf_dump_type): Pass down the flag from ctf_type_iter_all.
        (ctf_dump_format_type): Add non-root-type { } notation.
        Add root flag to prototype.
        (ctf_dump_label): Adjust accordingly.
        (ctf_dump_objts): Likewise.
        (ctf_dump_var): Likewise.
---
 libctf/ctf-dump.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/libctf/ctf-dump.c b/libctf/ctf-dump.c
index 8c49a04afd..46613bd2c5 100644
--- a/libctf/ctf-dump.c
+++ b/libctf/ctf-dump.c
@@ -95,7 +95,7 @@ ctf_is_slice (ctf_file_t *fp, ctf_id_t id, ctf_encoding_t *enc)
    type's references.  */
 
 static char *
-ctf_dump_format_type (ctf_file_t *fp, ctf_id_t id)
+ctf_dump_format_type (ctf_file_t *fp, ctf_id_t id, int flag)
 {
   ctf_id_t new_id;
   char *str = NULL, *bit = NULL, *buf = NULL;
@@ -104,8 +104,16 @@ ctf_dump_format_type (ctf_file_t *fp, ctf_id_t id)
   do
     {
       ctf_encoding_t enc;
+      const char *nonroot_leader = "";
+      const char *nonroot_trailer = "";
 
       id = new_id;
+      if (flag == CTF_ADD_NONROOT)
+ {
+  nonroot_leader = "{";
+  nonroot_trailer = "}";
+ }
+
       buf = ctf_type_aname (fp, id);
       if (!buf)
  goto oom;
@@ -115,15 +123,17 @@ ctf_dump_format_type (ctf_file_t *fp, ctf_id_t id)
       if (ctf_is_slice (fp, id, &enc))
  {
   ctf_type_encoding (fp, id, &enc);
-  if (asprintf (&bit, " %lx: [slice 0x%x:0x%x]",
- id, enc.cte_offset, enc.cte_bits) < 0)
+  if (asprintf (&bit, " %s%lx: [slice 0x%x:0x%x]%s",
+ nonroot_leader, id, enc.cte_offset, enc.cte_bits,
+ nonroot_trailer) < 0)
     goto oom;
  }
       else
  {
-  if (asprintf (&bit, " %lx: %s (size 0x%lx)", id, buf[0] == '\0' ?
- "(nameless)" : buf,
- (unsigned long) ctf_type_size (fp, id)) < 0)
+  if (asprintf (&bit, " %s%lx: %s (size 0x%lx)%s", nonroot_leader,
+ id, buf[0] == '\0' ? "(nameless)" : buf,
+ (unsigned long) ctf_type_size (fp, id),
+ nonroot_trailer) < 0)
     goto oom;
  }
       free (buf);
@@ -292,7 +302,8 @@ ctf_dump_label (const char *name, const ctf_lblinfo_t *info,
   if (asprintf (&str, "%s -> ", name) < 0)
     return (ctf_set_errno (state->cds_fp, ENOMEM));
 
-  if ((typestr = ctf_dump_format_type (state->cds_fp, info->ctb_type)) == NULL)
+  if ((typestr = ctf_dump_format_type (state->cds_fp, info->ctb_type,
+       CTF_ADD_ROOT)) == NULL)
     {
       free (str);
       return -1; /* errno is set for us.  */
@@ -348,7 +359,8 @@ ctf_dump_objts (ctf_file_t *fp, ctf_dump_state_t *state)
  }
 
       /* Variable type.  */
-      if ((typestr = ctf_dump_format_type (state->cds_fp, type)) == NULL)
+      if ((typestr = ctf_dump_format_type (state->cds_fp, type,
+   CTF_ADD_ROOT)) == NULL)
  {
   free (str);
   return -1; /* errno is set for us.  */
@@ -464,7 +476,8 @@ ctf_dump_var (const char *name, ctf_id_t type, void *arg)
   if (asprintf (&str, "%s -> ", name) < 0)
     return (ctf_set_errno (state->cds_fp, ENOMEM));
 
-  if ((typestr = ctf_dump_format_type (state->cds_fp, type)) == NULL)
+  if ((typestr = ctf_dump_format_type (state->cds_fp, type,
+       CTF_ADD_ROOT)) == NULL)
     {
       free (str);
       return -1; /* errno is set for us.  */
@@ -529,14 +542,14 @@ ctf_dump_member (const char *name, ctf_id_t id, unsigned long offset,
 /* Dump a single type into the cds_items.  */
 
 static int
-ctf_dump_type (ctf_id_t id, void *arg)
+ctf_dump_type (ctf_id_t id, int flag, void *arg)
 {
   char *str;
   ctf_dump_state_t *state = arg;
   ctf_dump_membstate_t membstate = { &str, state->cds_fp };
   size_t len;
 
-  if ((str = ctf_dump_format_type (state->cds_fp, id)) == NULL)
+  if ((str = ctf_dump_format_type (state->cds_fp, id, flag)) == NULL)
     goto err;
 
   str = ctf_str_append (str, "\n");
@@ -641,7 +654,7 @@ ctf_dump (ctf_file_t *fp, ctf_dump_state_t **statep, ctf_sect_names_t sect,
     goto end; /* errno is set for us.  */
   break;
  case CTF_SECT_TYPE:
-  if (ctf_type_iter (fp, ctf_dump_type, state) < 0)
+  if (ctf_type_iter_all (fp, ctf_dump_type, state) < 0)
     goto end; /* errno is set for us.  */
   break;
  case CTF_SECT_STR:
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 13/19] libctf: dump: check the right error values when dumping functions

Nick Alcock
In reply to this post by Nick Alcock
We weren't correctly detecting when there were no functions to dump in
the function info table, because we were checking for ECTF_NOTYPEDAT,
which means there are no *data objects* to dump.

Adjust accordingly.

libctf/
        * ctf-dump.c (ctf_dump_funcs): Check the right error value.
---
 libctf/ctf-dump.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libctf/ctf-dump.c b/libctf/ctf-dump.c
index 46613bd2c5..3d66411a70 100644
--- a/libctf/ctf-dump.c
+++ b/libctf/ctf-dump.c
@@ -401,7 +401,8 @@ ctf_dump_funcs (ctf_file_t *fp, ctf_dump_state_t *state)
   case ECTF_NOSYMTAB:
     return -1;
   case ECTF_NOTDATA:
-  case ECTF_NOTYPEDAT:
+  case ECTF_NOTFUNC:
+  case ECTF_NOFUNCDAT:
     continue;
   }
       if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 14/19] libctf: add the ctf_link machinery

Nick Alcock
In reply to this post by Nick Alcock
This is the start of work on the core of the linking mechanism for CTF
sections.  This commit handles the type and string sections.

The linker calls these functions in sequence:

ctf_link_add_ctf: to add each CTF section in the input in turn to a
  newly-created ctf_file_t (which will appear in the output, and which
  itself will become the shared parent that contains types that all
  TUs have in common (in all link modes) and all types that do not
  have conflicting definitions between types (by default).  Input files
  that are themselves products of ld -r are supported, though this is
  not heavily tested yet.

ctf_link: called once all input files are added to merge the types in
  all the input containers into the output container, eliminating
  duplicates.

ctf_link_add_strtab: called once the ELF string table is finalized and
  all its offsets are known, this calls a callback provided by the
  linker which returns the string content and offset of every string in
  the ELF strtab in turn: all these strings which appear in the input
  CTF strtab are eliminated from it in favour of the ELF strtab:
  equally, any strings that only appear in the input strtab will
  reappear in the internal CTF strtab of the output.

ctf_link_shuffle_syms (not yet implemented): called once the ELF symtab
  is finalized, this calls a callback provided by the linker which
  returns information on every symbol in turn as a ctf_link_sym_t.  This
  is then used to shuffle the function info and data object sections in
  the CTF section into symbol table order, eliminating the index
  sections which map those sections to symbol names before that point.
  Currently just returns ECTF_NOTYET.

ctf_link_write: Returns a buffer containing either a serialized
  ctf_file_t (if there are no types with conflicting definitions in the
  object files in the link) or a ctf_archive_t containing a large
  ctf_file_t (the common types) and a bunch of small ones named after
  individual CUs in which conflicting types are found (containing the
  conflicting types, and all types that reference them).  A threshold
  size above which compression takes place is passed as one parameter.
  (Currently, only gzip compression is supported, but I hope to add lzma
  as well.)

Lifetime rules for this are simple: don't close the input CTF files
until you've called ctf_link for the last time.  We do not assume
that symbols or strings passed in by the callback outlast the
call to ctf_link_add_strtab or ctf_link_shuffle_syms.

Right now, the duplicate elimination mechanism is the one already
present as part of the ctf_add_type function, and is not particularly
good: it misses numerous actual duplicates, and the conflicting-types
detection hardly ever reports that types conflict, even when they do
(one of them just tends to get silently dropped): it is also very slow.
This will all be fixed in the next few weeks, but the fix hardly touches
any of this code, and the linker does work without it, just not as
well as it otherwise might.  (And when no CTF section is present,
there is no effect on performance, of course.  So only people using
a trunk GCC with not-yet-committed patches will even notice.  By the
time it gets upstream, things should be better.)

include/
        * ctf-api.h (struct ctf_link_sym): New, a symbol in flight to the
        libctf linking machinery.
        (CTF_LINK_SHARE_UNCONFLICTED): New.
        (CTF_LINK_SHARE_DUPLICATED): New.
        (ECTF_LINKADDEDLATE): New, replacing ECTF_UNUSED.
        (ECTF_NOTYET): New, a 'not yet implemented' message.
        (ctf_link_add_ctf): New, add an input file's CTF to the link.
        (ctf_link): New, merge the type and string sections.
        (ctf_link_strtab_string_f): New, callback for feeding strtab info.
        (ctf_link_iter_symbol_f): New, callback for feeding symtab info.
        (ctf_link_add_strtab): New, tell the CTF linker about the ELF
        strtab's strings.
        (ctf_link_shuffle_syms): New, ask the CTF linker to shuffle its
        symbols into symtab order.
        (ctf_link_write): New, ask the CTF linker to write the CTF out.

libctf/
        * ctf-link.c: New file, linking of the string and type sections.
        * Makefile.am (libctf_a_SOURCES): Add it.
        * Makefile.in: Regenerate.

        * ctf-impl.h (ctf_file_t): New fields ctf_link_inputs,
        ctf_link_outputs.
        * ctf-create.c (ctf_update): Update accordingly.
        * ctf-open.c (ctf_file_close): Likewise.
        * ctf-error.c (_ctf_errlist): Updated with new errors.
---
 include/ctf-api.h   |  39 +++-
 libctf/Makefile.am  |   4 +-
 libctf/Makefile.in  |  21 +-
 libctf/ctf-create.c |   4 +
 libctf/ctf-error.c  |   5 +-
 libctf/ctf-impl.h   |   2 +
 libctf/ctf-link.c   | 507 ++++++++++++++++++++++++++++++++++++++++++++
 libctf/ctf-open.c   |   3 +
 8 files changed, 569 insertions(+), 16 deletions(-)
 create mode 100644 libctf/ctf-link.c

diff --git a/include/ctf-api.h b/include/ctf-api.h
index c122f4c13e..ac20551917 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -65,6 +65,28 @@ typedef struct ctf_sect
   size_t cts_entsize;  /* Size of each section entry (symtab only).  */
 } ctf_sect_t;
 
+/* A minimal symbol extracted from a linker's internal symbol table
+   representation.  */
+
+typedef struct ctf_link_sym
+{
+  /* The st_name will not be accessed outside the call to
+     ctf_link_shuffle_syms().  */
+
+  const char *st_name;
+  uint32_t st_shndx;
+  uint32_t st_type;
+  uint32_t st_value;
+} ctf_link_sym_t;
+
+/* Indication of how to share types when linking.  */
+
+/* Share all types thare are not in conflict.  The default.  */
+#define CTF_LINK_SHARE_UNCONFLICTED 0x0
+
+/* Share only types that are used by multiple inputs.  Not implemented yet.  */
+#define CTF_LINK_SHARE_DUPLICATED 0x1
+
 /* Symbolic names for CTF sections.  */
 
 typedef enum ctf_sect_names
@@ -145,7 +167,7 @@ enum
    ECTF_NOSYMTAB, /* Symbol table data is not available.  */
    ECTF_NOPARENT, /* Parent CTF container is not available.  */
    ECTF_DMODEL, /* Data model mismatch.  */
-   ECTF_UNUSED, /* Unused error.  */
+   ECTF_LINKADDEDLATE, /* File added to link too late.  */
    ECTF_ZALLOC, /* Failed to allocate (de)compression buffer.  */
    ECTF_DECOMPRESS, /* Failed to decompress CTF data.  */
    ECTF_STRTAB, /* String table for this string is missing.  */
@@ -180,7 +202,8 @@ enum
    ECTF_ARNNAME, /* Name not found in CTF archive.  */
    ECTF_SLICEOVERFLOW, /* Overflow of type bitness or offset in slice.  */
    ECTF_DUMPSECTUNKNOWN, /* Unknown section number in dump.  */
-   ECTF_DUMPSECTCHANGED /* Section changed in middle of dump.  */
+   ECTF_DUMPSECTCHANGED, /* Section changed in middle of dump.  */
+   ECTF_NOTYET /* Feature not yet implemented.  */
   };
 
 /* The CTF data model is inferred to be the caller's data model or the data
@@ -382,6 +405,18 @@ extern int ctf_gzwrite (ctf_file_t *fp, gzFile fd);
 extern int ctf_compress_write (ctf_file_t * fp, int fd);
 extern unsigned char *ctf_write_mem (ctf_file_t *, size_t *, size_t threshold);
 
+extern int ctf_link_add_ctf (ctf_file_t *, ctf_archive_t *, const char *);
+extern int ctf_link (ctf_file_t *, int share_mode);
+typedef const char *ctf_link_strtab_string_f (uint32_t *offset, void *arg);
+extern int ctf_link_add_strtab (ctf_file_t *, ctf_link_strtab_string_f *,
+ void *);
+typedef ctf_link_sym_t *ctf_link_iter_symbol_f (ctf_link_sym_t *dest,
+ void *arg);
+extern int ctf_link_shuffle_syms (ctf_file_t *, ctf_link_iter_symbol_f *,
+  void *);
+extern unsigned char *ctf_link_write (ctf_file_t *, size_t *size,
+      size_t threshold);
+
 extern void ctf_setdebug (int debug);
 extern int ctf_getdebug (void);
 
diff --git a/libctf/Makefile.am b/libctf/Makefile.am
index 43fc78a412..a0a27b46c3 100644
--- a/libctf/Makefile.am
+++ b/libctf/Makefile.am
@@ -33,8 +33,8 @@ AM_CFLAGS = -std=gnu99 @ac_libctf_warn_cflags@ @warn@ @c_warn@ @WARN_PEDANTIC@ @
 noinst_LIBRARIES = libctf.a
 
 libctf_a_SOURCES = ctf-archive.c ctf-dump.c ctf-create.c ctf-decl.c ctf-error.c \
-   ctf-hash.c ctf-labels.c ctf-lookup.c ctf-open.c ctf-open-bfd.c \
-   ctf-string.c ctf-subr.c ctf-types.c ctf-util.c
+   ctf-hash.c ctf-labels.c ctf-link.c ctf-lookup.c ctf-open.c \
+   ctf-open-bfd.c ctf-string.c ctf-subr.c ctf-types.c ctf-util.c
 if NEED_CTF_QSORT_R
 libctf_a_SOURCES += ctf-qsort_r.c
 endif
diff --git a/libctf/Makefile.in b/libctf/Makefile.in
index c898eb4941..1d2efb9e75 100644
--- a/libctf/Makefile.in
+++ b/libctf/Makefile.in
@@ -131,16 +131,16 @@ am__v_AR_1 =
 libctf_a_AR = $(AR) $(ARFLAGS)
 libctf_a_LIBADD =
 am__libctf_a_SOURCES_DIST = ctf-archive.c ctf-dump.c ctf-create.c \
- ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c ctf-lookup.c \
- ctf-open.c ctf-open-bfd.c ctf-string.c ctf-subr.c ctf-types.c \
- ctf-util.c ctf-qsort_r.c
+ ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c ctf-link.c \
+ ctf-lookup.c ctf-open.c ctf-open-bfd.c ctf-string.c ctf-subr.c \
+ ctf-types.c ctf-util.c ctf-qsort_r.c
 @NEED_CTF_QSORT_R_TRUE@am__objects_1 = ctf-qsort_r.$(OBJEXT)
 am_libctf_a_OBJECTS = ctf-archive.$(OBJEXT) ctf-dump.$(OBJEXT) \
  ctf-create.$(OBJEXT) ctf-decl.$(OBJEXT) ctf-error.$(OBJEXT) \
- ctf-hash.$(OBJEXT) ctf-labels.$(OBJEXT) ctf-lookup.$(OBJEXT) \
- ctf-open.$(OBJEXT) ctf-open-bfd.$(OBJEXT) ctf-string.$(OBJEXT) \
- ctf-subr.$(OBJEXT) ctf-types.$(OBJEXT) ctf-util.$(OBJEXT) \
- $(am__objects_1)
+ ctf-hash.$(OBJEXT) ctf-labels.$(OBJEXT) ctf-link.$(OBJEXT) \
+ ctf-lookup.$(OBJEXT) ctf-open.$(OBJEXT) ctf-open-bfd.$(OBJEXT) \
+ ctf-string.$(OBJEXT) ctf-subr.$(OBJEXT) ctf-types.$(OBJEXT) \
+ ctf-util.$(OBJEXT) $(am__objects_1)
 libctf_a_OBJECTS = $(am_libctf_a_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -331,9 +331,9 @@ AM_CPPFLAGS = -D_GNU_SOURCE -I$(top_srcdir) -I$(top_srcdir)/../include -I$(top_s
 AM_CFLAGS = -std=gnu99 @ac_libctf_warn_cflags@ @warn@ @c_warn@ @WARN_PEDANTIC@ @WERROR@ $(ZLIBINC)
 noinst_LIBRARIES = libctf.a
 libctf_a_SOURCES = ctf-archive.c ctf-dump.c ctf-create.c ctf-decl.c \
- ctf-error.c ctf-hash.c ctf-labels.c ctf-lookup.c ctf-open.c \
- ctf-open-bfd.c ctf-string.c ctf-subr.c ctf-types.c ctf-util.c \
- $(am__append_1)
+ ctf-error.c ctf-hash.c ctf-labels.c ctf-link.c ctf-lookup.c \
+ ctf-open.c ctf-open-bfd.c ctf-string.c ctf-subr.c ctf-types.c \
+ ctf-util.c $(am__append_1)
 all: config.h
  $(MAKE) $(AM_MAKEFLAGS) all-am
 
@@ -409,6 +409,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-error.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-hash.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-labels.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-link.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-lookup.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-open-bfd.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctf-open.Po@am__quote@
diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index 2e05649d4a..e3d5b22f17 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -455,6 +455,8 @@ ctf_update (ctf_file_t *fp)
   nfp->ctf_dtoldid = fp->ctf_dtnextid - 1;
   nfp->ctf_snapshots = fp->ctf_snapshots + 1;
   nfp->ctf_specific = fp->ctf_specific;
+  nfp->ctf_link_inputs = fp->ctf_link_inputs;
+  nfp->ctf_link_outputs = fp->ctf_link_outputs;
 
   nfp->ctf_snapshot_lu = fp->ctf_snapshots;
 
@@ -464,6 +466,8 @@ ctf_update (ctf_file_t *fp)
   nfp->ctf_str_atoms = fp->ctf_str_atoms;
   fp->ctf_str_atoms = NULL;
   memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t));
+  fp->ctf_link_inputs = NULL;
+  fp->ctf_link_outputs = NULL;
 
   fp->ctf_dvhash = NULL;
   memset (&fp->ctf_dvdefs, 0, sizeof (ctf_list_t));
diff --git a/libctf/ctf-error.c b/libctf/ctf-error.c
index af3a6c7fd2..dfbb5dbf38 100644
--- a/libctf/ctf-error.c
+++ b/libctf/ctf-error.c
@@ -33,7 +33,7 @@ static const char *const _ctf_errlist[] = {
   "Symbol table information is not available",     /* ECTF_NOSYMTAB */
   "Type information is in parent and unavailable",   /* ECTF_NOPARENT */
   "Cannot import types with different data model",   /* ECTF_DMODEL */
-  "Unused error",     /* ECTF_UNUSED */
+  "File added to link too late",     /* ECTF_LINKADDEDLATE */
   "Failed to allocate (de)compression buffer",     /* ECTF_ZALLOC */
   "Failed to decompress CTF data",     /* ECTF_DECOMPRESS */
   "External string table is not available",     /* ECTF_STRTAB */
@@ -68,7 +68,8 @@ static const char *const _ctf_errlist[] = {
   "Name not found in CTF archive",     /* ECTF_ARNNAME */
   "Overflow of type bitness or offset in slice",     /* ECTF_SLICEOVERFLOW */
   "Unknown section number in dump",     /* ECTF_DUMPSECTUNKNOWN */
-  "Section changed in middle of dump"     /* ECTF_DUMPSECTCHANGED */
+  "Section changed in middle of dump",     /* ECTF_DUMPSECTCHANGED */
+  "Feature not yet implemented"     /* ECTF_NOTYET */
 };
 
 static const int _ctf_nerr = sizeof (_ctf_errlist) / sizeof (_ctf_errlist[0]);
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 1c243d758c..088e31c851 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -266,6 +266,8 @@ struct ctf_file
   unsigned long ctf_snapshots;  /* ctf_snapshot() plus ctf_update() count.  */
   unsigned long ctf_snapshot_lu;  /* ctf_snapshot() call count at last update.  */
   ctf_archive_t *ctf_archive;  /* Archive this ctf_file_t came from.  */
+  ctf_dynhash_t *ctf_link_inputs; /* Inputs to this link.  */
+  ctf_dynhash_t *ctf_link_outputs; /* Additional outputs from this link.  */
   char *ctf_tmp_typeslice;  /* Storage for slicing up type names.  */
   size_t ctf_tmp_typeslicelen;  /* Size of the typeslice.  */
   void *ctf_specific;  /* Data for ctf_get/setspecific().  */
diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c
new file mode 100644
index 0000000000..8e0f6389a6
--- /dev/null
+++ b/libctf/ctf-link.c
@@ -0,0 +1,507 @@
+/* CTF linking.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of libctf.
+
+   libctf is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctf-impl.h>
+#include <string.h>
+
+/* Type tracking machinery.  */
+/* Linker machinery.
+
+   CTF linking consists of adding CTF archives full of content to be merged into
+   this one to the current file (which must be writable) by calling
+   ctf_link_add_ctf().  Once this is done, a call to ctf_link() will merge the
+   type tables together, generating new CTF files as needed, with this one as a
+   parent, to contain types from the inputs which conflict.
+   ctf_link_add_strtab() takes a callback which provides string/offset pairs to
+   be added to the external symbol table and deduplicated from all CTF string
+   tables in the output link; ctf_link_shuffle_syms() takes a callback which
+   provides symtab entries in ascending order, and shuffles the function and
+   data sections to match; and ctf_link_write() emits a CTF file (if there are
+   no conflicts requiring per-compilation-unit sub-CTF files) or CTF archives
+   (otherwise) and returns it, suitable for addition in the .ctf section of the
+   output.  */
+
+/* Add a file to a link.  */
+
+static void ctf_arc_close_thunk (void *arc)
+{
+  ctf_arc_close ((ctf_archive_t *) arc);
+}
+
+static void ctf_file_close_thunk (void *file)
+{
+  ctf_file_close ((ctf_file_t *) file);
+}
+
+int
+ctf_link_add_ctf (ctf_file_t *fp, ctf_archive_t *ctf, const char *name)
+{
+  char *dupname = NULL;
+
+  if (fp->ctf_link_outputs)
+    return (ctf_set_errno (fp, ECTF_LINKADDEDLATE));
+  if (fp->ctf_link_inputs == NULL)
+    fp->ctf_link_inputs = ctf_dynhash_create (ctf_hash_string,
+      ctf_hash_eq_string, free,
+      ctf_arc_close_thunk);
+
+  if (fp->ctf_link_inputs == NULL)
+    goto oom;
+
+  if ((dupname = strdup (name)) == NULL)
+    goto oom;
+
+  if (ctf_dynhash_insert (fp->ctf_link_inputs, dupname, ctf) < 0)
+    goto oom;
+
+  return 0;
+ oom:
+  free (fp->ctf_link_inputs);
+  fp->ctf_link_inputs = NULL;
+  free (dupname);
+  return (ctf_set_errno (fp, ENOMEM));
+}
+
+typedef struct ctf_link_in_member_cb_arg
+{
+  ctf_file_t *out_fp;
+  const char *file_name;
+  ctf_file_t *in_fp;
+  ctf_file_t *main_input_fp;
+  char *cu_name;
+  char *arcname;
+  int done_main_member;
+  int share_mode;
+  int in_input_cu_file;
+  int err;
+} ctf_link_in_member_cb_arg_t;
+
+
+/* Link one type into the link.  We rely on ctf_add_type() to detect
+   duplicates.  This is not terribly reliable yet (unnmamed types will be
+   mindlessly duplicated), but will improve shortly.  */
+
+static int
+ctf_link_one_type (ctf_id_t type, int isroot _libctf_unused_, void *arg_)
+{
+  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
+  ctf_file_t *per_cu_out_fp;
+  int err;
+
+  if (arg->share_mode != CTF_LINK_SHARE_UNCONFLICTED)
+    {
+      ctf_dprintf ("Share-duplicated mode not yet implemented.\n");
+      return ECTF_NOTYET;
+    }
+
+  /* Simply call ctf_add_type: if it reports a conflict and we're adding to the
+     main CTF file, add to the per-CU archive member instead, creating it if
+     necessary.  If we got this type from a per-CU archive member, add it
+     straight back to the corresponding member in the output.  */
+
+  if (!arg->in_input_cu_file)
+    {
+      err = ctf_add_type (arg->out_fp, arg->in_fp, type);
+
+      if (err > -1)
+ return 0;
+
+      if (err != ECTF_CONFLICT)
+ {
+  ctf_dprintf ("Cannot link type %lx from archive member %s, input file %s "
+       "into output link: %s\n", type, arg->arcname, arg->file_name,
+       ctf_errmsg (ctf_errno (arg->out_fp)));
+  return err;
+ }
+    }
+
+  if ((per_cu_out_fp = ctf_dynhash_lookup (arg->out_fp->ctf_link_outputs,
+   arg->arcname)) == NULL)
+    {
+      int err;
+
+      if ((per_cu_out_fp = ctf_create (&err)) == NULL)
+ {
+  ctf_dprintf ("Cannot create per-CU CTF archive for member %s: %s\n",
+       arg->arcname, ctf_errmsg (err));
+  return err;
+ }
+
+      if (ctf_dynhash_insert (arg->out_fp->ctf_link_outputs, arg->arcname,
+      per_cu_out_fp) < 0)
+  return ENOMEM;
+
+      ctf_import (per_cu_out_fp, arg->out_fp);
+      ctf_cuname_set (per_cu_out_fp, arg->cu_name);
+    }
+
+  err = ctf_add_type (per_cu_out_fp, arg->in_fp, type);
+
+  if (err > -1)
+    return 0;
+
+  ctf_dprintf ("Cannot link type %lx from CTF archive member %s, input file %s "
+       "into output per-CU CTF archive member %s: %s: skipped\n", type,
+       arg->arcname, arg->file_name, arg->arcname,
+       ctf_errmsg (ctf_errno (arg->out_fp)));
+  return err; /* Should be impossible: abort link.  */
+}
+
+/* Link one variable in.  */
+
+/* Merge every type and variable in this archive member into the link, so we can
+   relink things that have already had ld run on them.  We use the archive
+   member name, sans any leading '.ctf.', as the CU name for ambiguous types if
+   there is one and it's not the default: otherwise, we use the name of the
+   input file.  */
+static int
+ctf_link_one_input_archive_member (ctf_file_t *in_fp, const char *name, void *arg_)
+{
+  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
+  int err;
+
+  if (strcmp (name, _CTF_SECTION) == 0)
+    {
+      /* This file is the default member of this archive, and has already been
+ explicitly processed.
+
+ In the default sharing mode of CTF_LINK_SHARE_UNCONFLICTED, it does no
+ harm to rescan an existing shared repo again: all the types will just
+ end up in the same place.  But in CTF_LINK_SHARE_DUPLICATED mode, this
+ causes the system to erroneously conclude that all types are duplicated
+ and should be shared, even if they are not.  */
+
+      if (arg->done_main_member)
+ return 0;
+      arg->arcname = strdup (".ctf.");
+      arg->arcname = ctf_str_append (arg->arcname, arg->file_name);
+    }
+  else
+    {
+      arg->arcname = strdup (name);
+
+      /* Get ambiguous types from our parent.  */
+      ctf_import (in_fp, arg->main_input_fp);
+      arg->in_input_cu_file = 1;
+    }
+
+  arg->cu_name = arg->arcname;
+  if (strncmp (arg->cu_name, ".ctf.", strlen (".ctf.")) == 0)
+    arg->cu_name += strlen (".ctf.");
+  arg->in_fp = in_fp;
+
+  err = ctf_type_iter_all (in_fp, ctf_link_one_type, arg);
+
+  if (err == 0)
+  arg->in_input_cu_file = 0;
+  free (arg->arcname);
+
+  return err;
+}
+
+/* Link one input file's types into the output file.  */
+static void
+ctf_link_one_input_archive (void *key, void *value, void *arg_)
+{
+  const char *file_name = (const char *) key;
+  ctf_archive_t *arc = (ctf_archive_t *) value;
+  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
+  int err;
+
+  arg->file_name = file_name;
+  arg->done_main_member = 0;
+  if ((arg->main_input_fp = ctf_arc_open_by_name (arc, NULL, &err)) == NULL)
+    if (err != ECTF_ARNNAME)
+      {
+ ctf_dprintf ("Cannot open main archive member in input file %s in the "
+     "link: skipping: %s.\n", arg->file_name,
+     ctf_errmsg (err));
+ return;
+      }
+
+  ctf_link_one_input_archive_member (arg->main_input_fp, _CTF_SECTION, arg);
+  arg->done_main_member = 1;
+  if ((err = ctf_archive_iter (arc, ctf_link_one_input_archive_member,
+       arg)) < 0)
+    {
+      ctf_dprintf ("Cannot traverse archive in input file %s: some types "
+   "skipped: %s.\n", arg->file_name, ctf_errmsg (err));
+      arg->err = err;
+    }
+  ctf_file_close (arg->main_input_fp);
+}
+
+/* Merge types and variable sections in all files added to the link
+   together.  */
+int
+ctf_link (ctf_file_t *fp, int share_mode)
+{
+  ctf_link_in_member_cb_arg_t arg;
+
+  memset (&arg, 0, sizeof (struct ctf_link_in_member_cb_arg));
+  arg.out_fp = fp;
+  arg.share_mode = share_mode;
+
+  if (fp->ctf_link_inputs == NULL)
+    return 0; /* Nothing to do. */
+
+  if (fp->ctf_link_outputs == NULL)
+    fp->ctf_link_outputs = ctf_dynhash_create (ctf_hash_string,
+       ctf_hash_eq_string, free,
+       ctf_file_close_thunk);
+
+  if (fp->ctf_link_outputs == NULL)
+    return ctf_set_errno (fp, ENOMEM);
+
+  ctf_dynhash_iter (fp->ctf_link_inputs, ctf_link_one_input_archive,
+    &arg);
+
+  /* Promote any sub-CU errors into the main archive.  */
+  if (arg.err)
+    return ctf_set_errno (fp, arg.err);
+  return 0;
+}
+
+typedef struct ctf_link_out_string_cb_arg
+{
+  const char *str;
+  uint32_t offset;
+  int err;
+} ctf_link_out_string_cb_arg_t;
+
+/* Intern a string in the string table of an output per-CU CTF file.  */
+static void
+ctf_link_intern_extern_string (void *key _libctf_unused_, void *value,
+       void *arg_)
+{
+  ctf_file_t *fp = (ctf_file_t *) value;
+  ctf_link_out_string_cb_arg_t *arg = (ctf_link_out_string_cb_arg_t *) arg_;
+
+  fp->ctf_flags |= LCTF_DIRTY;
+  if (ctf_str_add_external (fp, arg->str, arg->offset) == NULL)
+    arg->err = ENOMEM;
+}
+
+/* Repeatedly call ADD_STRING to acquire strings from the external string table,
+   adding them to the atoms table for this CU and all subsidiary CUs.
+
+   If ctf_link() is also called, it must be called first if you want the new CTF
+   files ctf_link() can create to get their strings dedupped against the ELF
+   strtab properly.  */
+int
+ctf_link_add_strtab (ctf_file_t *fp, ctf_link_strtab_string_f *add_string,
+     void *arg)
+{
+  const char *str;
+  uint32_t offset;
+  int err = 0;
+
+  while ((str = add_string (&offset, arg)) != NULL)
+    {
+      ctf_link_out_string_cb_arg_t iter_arg = { str, offset, 0 };
+
+      fp->ctf_flags |= LCTF_DIRTY;
+      if (ctf_str_add_external (fp, str, offset) == NULL)
+ err = ENOMEM;
+
+      ctf_dynhash_iter (fp->ctf_link_outputs, ctf_link_intern_extern_string,
+ &iter_arg);
+      if (iter_arg.err)
+ err = iter_arg.err;
+    }
+
+  return err;
+}
+
+/* Not yet implemented.  */
+int
+ctf_link_shuffle_syms (ctf_file_t *fp _libctf_unused_,
+       ctf_link_iter_symbol_f *add_sym _libctf_unused_,
+       void *arg _libctf_unused_)
+{
+  return 0;
+}
+
+typedef struct ctf_name_list_accum_cb_arg
+{
+  char **names;
+  ctf_file_t **files;
+  size_t i;
+  int err;
+} ctf_name_list_accum_cb_arg_t;
+
+/* Accumulate the names and a count of the names in the link output hash,
+   and run ctf_update() on them to generate them.  */
+static void
+ctf_accumulate_archive_names (void *key, void *value, void *arg_)
+{
+  const char *name = (const char *) key;
+  ctf_file_t *fp = (ctf_file_t *) value;
+  char **names;
+  ctf_file_t **files;
+  ctf_name_list_accum_cb_arg_t *arg = (ctf_name_list_accum_cb_arg_t *) arg_;
+  int err;
+
+  if ((err = ctf_update (fp)) < 0)
+    {
+      arg->err = err;
+      return;
+    }
+
+  if ((names = realloc (arg->names, sizeof (char *) * ++(arg->i))) == NULL)
+    {
+      (arg->i)--;
+      arg->err = ENOMEM;
+      return;
+    }
+
+  if ((files = realloc (arg->files, sizeof (ctf_file_t *) * arg->i)) == NULL)
+    {
+      (arg->i)--;
+      arg->err = ENOMEM;
+      return;
+    }
+  arg->names = names;
+  arg->names[(arg->i) - 1] = (char *) name;
+  arg->files = files;
+  arg->files[(arg->i) - 1] = fp;
+}
+
+/* Write out a CTF archive (if there are per-CU CTF files) or a CTF file
+   (otherwise) into a new dynamically-allocated string, and return it.
+   Members with sizes above THRESHOLD are compressed.  */
+unsigned char *
+ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
+{
+  ctf_name_list_accum_cb_arg_t arg;
+  char **names;
+  ctf_file_t **files;
+  FILE *f = NULL;
+  int err;
+  long fsize;
+  const char *errloc;
+  unsigned char *buf = NULL;
+
+  memset (&arg, 0, sizeof (ctf_name_list_accum_cb_arg_t));
+
+  if ((err = ctf_update (fp)) < 0)
+    {
+      errloc = "CTF file construction";
+      goto err;
+    }
+
+  if (fp->ctf_link_outputs)
+    {
+      ctf_dynhash_iter (fp->ctf_link_outputs, ctf_accumulate_archive_names, &arg);
+      if (arg.err)
+ {
+  errloc = "hash creation";
+  err = arg.err;
+  goto err;
+ }
+    }
+
+  /* No extra outputs? Just write a simple ctf_file_t.  */
+  if (arg.i == 0)
+    return ctf_write_mem (fp, size, threshold);
+
+  /* Writing an archive.  Stick ourselves (the shared repository, parent of all
+     other archives) on the front of it with the default name.  */
+  if ((names = realloc (arg.names, sizeof (char *) * (arg.i + 1))) == NULL)
+    {
+      errloc = "name reallocation";
+      goto err_no;
+    }
+  arg.names = names;
+  memmove (&(arg.names[1]), arg.names, sizeof (char *) * (arg.i));
+  arg.names[0] = (char *) _CTF_SECTION;
+
+  if ((files = realloc (arg.files,
+ sizeof (struct ctf_file *) * (arg.i + 1))) == NULL)
+    {
+      errloc = "ctf_file reallocation";
+      goto err_no;
+    }
+  arg.files = files;
+  memmove (&(arg.files[1]), arg.files, sizeof (ctf_file_t *) * (arg.i));
+  arg.files[0] = fp;
+
+  if ((f = tmpfile ()) == NULL)
+    {
+      errloc = "tempfile creation";
+      goto err_no;
+    }
+
+  if ((err = ctf_arc_write_fd (fileno (f), arg.files, arg.i + 1,
+       (const char **) arg.names,
+       threshold)) < 0)
+    {
+      errloc = "archive writing";
+      goto err;
+    }
+
+  if (fseek (f, 0, SEEK_END) < 0)
+    {
+      errloc = "seeking to end";
+      goto err_no;
+    }
+
+  if ((fsize = ftell (f)) < 0)
+    {
+      errloc = "filesize determination";
+      goto err_no;
+    }
+
+  if (fseek (f, 0, SEEK_SET) < 0)
+    {
+      errloc = "filepos resetting";
+      goto err_no;
+    }
+
+  if ((buf = malloc (fsize)) == NULL)
+    {
+      errloc = "CTF archive buffer allocation";
+      goto err_no;
+    }
+
+  while (!feof (f) && fread (buf, fsize, 1, f) == 0)
+    if (ferror (f))
+      {
+ errloc = "reading archive from temporary file";
+ goto err_no;
+      }
+
+  *size = fsize;
+  free (arg.names);
+  free (arg.files);
+  return buf;
+
+ err_no:
+  err = errno;
+ err:
+  free (buf);
+  if (f)
+    fclose (f);
+  free (arg.names);
+  free (arg.files);
+  ctf_dprintf ("Cannot write archive in link: %s failure: %s\n", errloc,
+       ctf_errmsg (err));
+  ctf_set_errno (fp, err);
+  return NULL;
+}
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index ee09ca123b..577681d576 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1599,6 +1599,9 @@ ctf_file_close (ctf_file_t *fp)
 
   ctf_free (fp->ctf_dynbase);
 
+  ctf_dynhash_destroy (fp->ctf_link_inputs);
+  ctf_dynhash_destroy (fp->ctf_link_outputs);
+
   ctf_free (fp->ctf_sxlate);
   ctf_free (fp->ctf_txlate);
   ctf_free (fp->ctf_ptrtab);
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 15/19] libctf: map from old to corresponding newly-added types in ctf_add_type

Nick Alcock
In reply to this post by Nick Alcock
This lets you call ctf_type_mapping (dest_fp, src_fp, src_type_id)
and get told what type ID the corresponding type has in the target
ctf_file_t.  This works even if it was added by a recursive call, and
because it is stored in the target ctf_file_t it works even if we
had to add one type to multiple ctf_file_t's as part of conflicting
type handling.

We only empty out this mapping at ctf_file_close time, because it is
perfectly valid to call ctf_link repeatedly, or to call it after calling
ctf_file_write: we have to keep tracking these types until the types
themselves are thrown away.

libctf/
        * ctf-impl.h (ctf_file_t): New field ctf_link_type_mapping.
        (struct ctf_link_type_mapping_key): New.
        (ctf_hash_type_mapping_key): Likewise.
        (ctf_hash_eq_type_mapping_key): Likewise.
        (ctf_add_type_mapping): Likewise.
        (ctf_type_mapping): Likewise.
        * ctf-open.c (ctf_file_close): Update accordingly.
        * ctf-create.c (ctf_update): Likewise.
        (ctf_add_type): Populate the mapping.
        * ctf-hash.c (ctf_hash_type_mapping_key): Hash a type mapping key.
        (ctf_hash_eq_type_mapping_key): Check the key for equality.
        (ctf_dynhash_insert): Fix comment typo.
        * ctf-link.c (ctf_add_type_mapping): New.
        (ctf_type_mapping): Likewise.
---
 libctf/ctf-create.c | 15 ++++++-
 libctf/ctf-hash.c   | 24 ++++++++++-
 libctf/ctf-impl.h   | 19 +++++++++
 libctf/ctf-link.c   | 97 +++++++++++++++++++++++++++++++++++++++++++++
 libctf/ctf-open.c   |  1 +
 5 files changed, 153 insertions(+), 3 deletions(-)

diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index e3d5b22f17..1a72128878 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -457,6 +457,7 @@ ctf_update (ctf_file_t *fp)
   nfp->ctf_specific = fp->ctf_specific;
   nfp->ctf_link_inputs = fp->ctf_link_inputs;
   nfp->ctf_link_outputs = fp->ctf_link_outputs;
+  nfp->ctf_link_type_mapping = fp->ctf_link_type_mapping;
 
   nfp->ctf_snapshot_lu = fp->ctf_snapshots;
 
@@ -468,6 +469,7 @@ ctf_update (ctf_file_t *fp)
   memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t));
   fp->ctf_link_inputs = NULL;
   fp->ctf_link_outputs = NULL;
+  fp->ctf_link_type_mapping = NULL;
 
   fp->ctf_dvhash = NULL;
   memset (&fp->ctf_dvdefs, 0, sizeof (ctf_list_t));
@@ -1540,6 +1542,7 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
   ctf_funcinfo_t ctc;
 
   ctf_hash_t *hp;
+  ctf_id_t orig_src_type = src_type;
 
   if (!(dst_fp->ctf_flags & LCTF_RDWR))
     return (ctf_set_errno (dst_fp, ECTF_RDONLY));
@@ -1623,7 +1626,10 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
       if (memcmp (&src_en, &dst_en, sizeof (ctf_encoding_t)) == 0)
  {
   if (kind != CTF_K_SLICE)
-    return dst_type;
+    {
+      ctf_add_type_mapping (src_fp, src_type, dst_fp, dst_type);
+      return dst_type;
+    }
  }
       else
   {
@@ -1681,7 +1687,10 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
       if (match && sroot == droot)
  {
   if (kind != CTF_K_SLICE)
-    return dtd->dtd_type;
+    {
+      ctf_add_type_mapping (src_fp, src_type, dst_fp, dst_type);
+      return dtd->dtd_type;
+    }
  }
       else if (!match && sroot && droot)
  {
@@ -1922,6 +1931,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
       return (ctf_set_errno (dst_fp, ECTF_CORRUPT));
     }
 
+  if (dst_type != CTF_ERR)
+    ctf_add_type_mapping (src_fp, orig_src_type, dst_fp, dst_type);
   return dst_type;
 }
 
diff --git a/libctf/ctf-hash.c b/libctf/ctf-hash.c
index 12bd6ef9b9..624e8084ef 100644
--- a/libctf/ctf-hash.c
+++ b/libctf/ctf-hash.c
@@ -82,6 +82,28 @@ ctf_hash_eq_string (const void *a, const void *b)
   return !strcmp((const char *) hep_a->key, (const char *) hep_b->key);
 }
 
+/* Hash a type_mapping_key.  */
+unsigned int
+ctf_hash_type_mapping_key (const void *ptr)
+{
+  ctf_helem_t *hep = (ctf_helem_t *) ptr;
+  ctf_link_type_mapping_key_t *k = (ctf_link_type_mapping_key_t *) hep->key;
+
+  return htab_hash_pointer (k->cltm_fp) + 59 * htab_hash_pointer ((void *) k->cltm_idx);
+}
+
+int
+ctf_hash_eq_type_mapping_key (const void *a, const void *b)
+{
+  ctf_helem_t *hep_a = (ctf_helem_t *) a;
+  ctf_helem_t *hep_b = (ctf_helem_t *) b;
+  ctf_link_type_mapping_key_t *key_a = (ctf_link_type_mapping_key_t *) hep_a->key;
+  ctf_link_type_mapping_key_t *key_b = (ctf_link_type_mapping_key_t *) hep_b->key;
+
+  return (key_a->cltm_fp == key_b->cltm_fp)
+    && (key_a->cltm_idx == key_b->cltm_idx);
+}
+
 /* The dynhash, used for hashes whose size is not known at creation time. */
 
 /* Free a single ctf_helem.  */
@@ -164,7 +186,7 @@ ctf_dynhash_insert (ctf_dynhash_t *hp, void *key, void *value)
     return errno;
 
   /* We need to keep the key_free and value_free around in each item because the
-     del function has no visiblity into the hash as a whole, only into the
+     del function has no visibility into the hash as a whole, only into the
      individual items.  */
 
   slot->key_free = hp->key_free;
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 088e31c851..0b72866b30 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -204,6 +204,17 @@ typedef struct ctf_str_atom_ref
   uint32_t *caf_ref; /* A single ref to this string.  */
 } ctf_str_atom_ref_t;
 
+/* The structure used as the key in a ctf_link_type_mapping, which lets the
+   linker machinery determine which type IDs on the input side of a link map to
+   which types on the output side.  (The value is a ctf_id_t: another
+   index, not a type.)  */
+
+typedef struct ctf_link_type_mapping_key
+{
+  ctf_file_t *cltm_fp;
+  ctf_id_t cltm_idx;
+} ctf_link_type_mapping_key_t;
+
 /* The ctf_file is the structure used to represent a CTF container to library
    clients, who see it only as an opaque pointer.  Modifications can therefore
    be made freely to this structure without regard to client versioning.  The
@@ -268,6 +279,7 @@ struct ctf_file
   ctf_archive_t *ctf_archive;  /* Archive this ctf_file_t came from.  */
   ctf_dynhash_t *ctf_link_inputs; /* Inputs to this link.  */
   ctf_dynhash_t *ctf_link_outputs; /* Additional outputs from this link.  */
+  ctf_dynhash_t *ctf_link_type_mapping; /* Map input types to output types.  */
   char *ctf_tmp_typeslice;  /* Storage for slicing up type names.  */
   size_t ctf_tmp_typeslicelen;  /* Size of the typeslice.  */
   void *ctf_specific;  /* Data for ctf_get/setspecific().  */
@@ -327,10 +339,12 @@ extern const ctf_type_t *ctf_lookup_by_id (ctf_file_t **, ctf_id_t);
 typedef unsigned int (*ctf_hash_fun) (const void *ptr);
 extern unsigned int ctf_hash_integer (const void *ptr);
 extern unsigned int ctf_hash_string (const void *ptr);
+extern unsigned int ctf_hash_type_mapping_key (const void *ptr);
 
 typedef int (*ctf_hash_eq_fun) (const void *, const void *);
 extern int ctf_hash_eq_integer (const void *, const void *);
 extern int ctf_hash_eq_string (const void *, const void *);
+extern int ctf_hash_eq_type_mapping_key (const void *, const void *);
 
 typedef void (*ctf_hash_free_fun) (void *);
 
@@ -370,6 +384,11 @@ extern int ctf_dvd_insert (ctf_file_t *, ctf_dvdef_t *);
 extern void ctf_dvd_delete (ctf_file_t *, ctf_dvdef_t *);
 extern ctf_dvdef_t *ctf_dvd_lookup (const ctf_file_t *, const char *);
 
+extern void ctf_add_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type,
+  ctf_file_t *dst_fp, ctf_id_t dst_type);
+extern ctf_id_t ctf_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type,
+  ctf_file_t **dst_fp);
+
 extern void ctf_decl_init (ctf_decl_t *);
 extern void ctf_decl_fini (ctf_decl_t *);
 extern void ctf_decl_push (ctf_decl_t *, ctf_file_t *, ctf_id_t);
diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c
index 8e0f6389a6..1b1a718786 100644
--- a/libctf/ctf-link.c
+++ b/libctf/ctf-link.c
@@ -21,6 +21,103 @@
 #include <string.h>
 
 /* Type tracking machinery.  */
+
+/* Record the correspondence between a source and ctf_add_type()-added
+   destination type: both types are translated into parent type IDs if need be,
+   so they relate to the actual container they are in.  Outside controlled
+   circumstances (like linking) it is probably not useful to do more than
+   compare these pointers, since there is nothing stopping the user closing the
+   source container whenever they want to.
+
+   Our OOM handling here is just to not do anything, because this is called deep
+   enough in the call stack that doing anything useful is painfully difficult:
+   the worst consequence if we do OOM is a bit of type duplication anyway.  */
+
+void
+ctf_add_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type,
+      ctf_file_t *dst_fp, ctf_id_t dst_type)
+{
+  if (LCTF_TYPE_ISPARENT (src_fp, src_type) && src_fp->ctf_parent)
+    src_fp = src_fp->ctf_parent;
+
+  src_type = LCTF_TYPE_TO_INDEX(src_fp, src_type);
+
+  if (LCTF_TYPE_ISPARENT (dst_fp, dst_type) && dst_fp->ctf_parent)
+    dst_fp = dst_fp->ctf_parent;
+
+  dst_type = LCTF_TYPE_TO_INDEX(dst_fp, dst_type);
+
+  /* This dynhash is a bit tricky: it has a multivalued (structural) key, so we
+     need to use the sized-hash machinery to generate key hashing and equality
+     functions.  */
+
+  if (dst_fp->ctf_link_type_mapping == NULL)
+    {
+      ctf_hash_fun f = ctf_hash_type_mapping_key;
+      ctf_hash_eq_fun e = ctf_hash_eq_type_mapping_key;
+
+      if ((dst_fp->ctf_link_type_mapping = ctf_dynhash_create (f, e, free,
+       NULL)) == NULL)
+ return;
+    }
+
+  ctf_link_type_mapping_key_t *key;
+  key = calloc (1, sizeof (struct ctf_link_type_mapping_key));
+  if (!key)
+    return;
+
+  key->cltm_fp = src_fp;
+  key->cltm_idx = src_type;
+
+  ctf_dynhash_insert (dst_fp->ctf_link_type_mapping, key,
+      (void *) (uintptr_t) dst_type);
+}
+
+/* Look up a type mapping: return 0 if none.  The DST_FP is modified to point to
+   the parent if need be.  The ID returned is from the dst_fp's perspective.  */
+ctf_id_t
+ctf_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type, ctf_file_t **dst_fp)
+{
+  ctf_link_type_mapping_key_t key;
+  ctf_file_t *target_fp = *dst_fp;
+  ctf_id_t dst_type = 0;
+
+  if (LCTF_TYPE_ISPARENT (src_fp, src_type) && src_fp->ctf_parent)
+    src_fp = src_fp->ctf_parent;
+
+  src_type = LCTF_TYPE_TO_INDEX(src_fp, src_type);
+  key.cltm_fp = src_fp;
+  key.cltm_idx = src_type;
+
+  if (target_fp->ctf_link_type_mapping)
+    dst_type = (uintptr_t) ctf_dynhash_lookup (target_fp->ctf_link_type_mapping,
+       &key);
+
+  if (dst_type != 0)
+    {
+      dst_type = LCTF_INDEX_TO_TYPE (target_fp, dst_type,
+     target_fp->ctf_parent != NULL);
+      *dst_fp = target_fp;
+      return dst_type;
+    }
+
+  if (target_fp->ctf_parent)
+    target_fp = target_fp->ctf_parent;
+  else
+    return 0;
+
+  if (target_fp->ctf_link_type_mapping)
+    dst_type = (uintptr_t) ctf_dynhash_lookup (target_fp->ctf_link_type_mapping,
+       &key);
+
+  if (dst_type)
+    dst_type = LCTF_INDEX_TO_TYPE (target_fp, dst_type,
+   target_fp->ctf_parent != NULL);
+
+  *dst_fp = target_fp;
+  return dst_type;
+}
+
 /* Linker machinery.
 
    CTF linking consists of adding CTF archives full of content to be merged into
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 577681d576..81a068276b 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1601,6 +1601,7 @@ ctf_file_close (ctf_file_t *fp)
 
   ctf_dynhash_destroy (fp->ctf_link_inputs);
   ctf_dynhash_destroy (fp->ctf_link_outputs);
+  ctf_dynhash_destroy (fp->ctf_link_type_mapping);
 
   ctf_free (fp->ctf_sxlate);
   ctf_free (fp->ctf_txlate);
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 16/19] libctf: add linking of the variable section

Nick Alcock
In reply to this post by Nick Alcock
The compiler describes the name and type of all file-scope variables in
this section.  Merging it at link time requires using the type mapping
added in the previous commit to determine the appropriate type for the
variable in the output, given its type in the input: we check the shared
container first, and if the type doesn't exist there, it must be a
conflicted type in the per-CU child, and the variable should go there
too (no other option is possible: no variable can have a type that is
defined in some *other* CU).

libctf/
        * ctf-link.c (ctf_link_one_variable): New.
        (ctf_link_one_input_archive_member): Call it.
---
 libctf/ctf-link.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c
index 1b1a718786..fe88dd44e9 100644
--- a/libctf/ctf-link.c
+++ b/libctf/ctf-link.c
@@ -262,6 +262,74 @@ ctf_link_one_type (ctf_id_t type, int isroot _libctf_unused_, void *arg_)
 
 /* Link one variable in.  */
 
+static int
+ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_)
+{
+  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
+  ctf_dvdef_t *dvd;
+  ctf_id_t dst_type = 0;
+  ctf_file_t *check_fp;
+
+  /* In unconflicted link mode, when called on a child, we want to try to merge
+     into the parent first, then the child (if there is one): it must be
+     possible to merge into one of those given valid input.  Look for the type
+     of this variable in the parent.  */
+
+  if (arg->out_fp->ctf_parent)
+    {
+      check_fp = arg->out_fp->ctf_parent;
+
+      dst_type = ctf_type_mapping (arg->in_fp, type, &check_fp);
+      if (dst_type != 0)
+ {
+  /* Got it in the parent.  Is there already a variable of this name in
+     the parent? Does it already refer to the right type?  */
+
+  dvd = ctf_dynhash_lookup (check_fp->ctf_dvhash, name);
+  if (dvd && dvd->dvd_type == dst_type)
+    return 0;
+
+  /* No variable here: we can add it.  */
+  if (!dvd)
+    {
+      ctf_add_variable (check_fp, name, dst_type);
+      return 0;
+    }
+ }
+    }
+
+  /* Not in the parent, or conflicted, or no parent at all.  Find the type in
+     the child if necessary, then add it there.  */
+
+  /* This type is from the parent's perspective: childify it.  */
+  if (dst_type != 0 && arg->out_fp->ctf_parent)
+    {
+      dst_type = LCTF_TYPE_TO_INDEX (arg->out_fp->ctf_parent, dst_type);
+      dst_type = LCTF_INDEX_TO_TYPE (arg->out_fp, dst_type, 1);
+    }
+  else
+    {
+      /* Look up the type in the child.  */
+      check_fp = arg->out_fp;
+
+      dst_type = ctf_type_mapping (arg->in_fp, type, &check_fp);
+    }
+
+  /* Type still unknown. Impossible: warn and fail.  */
+  if (dst_type == 0)
+    {
+      ctf_dprintf ("Type %lx from CTF archive member %s, input file %s not "
+   "known in parent while adding variable %s: this should "
+   "never happen.\n", type, arg->arcname, arg->file_name,
+   name);
+      return EINVAL;
+    }
+
+  ctf_add_variable (check_fp, name, dst_type);
+
+  return 0;
+}
+
 /* Merge every type and variable in this archive member into the link, so we can
    relink things that have already had ld run on them.  We use the archive
    member name, sans any leading '.ctf.', as the CU name for ambiguous types if
@@ -306,6 +374,7 @@ ctf_link_one_input_archive_member (ctf_file_t *in_fp, const char *name, void *ar
   err = ctf_type_iter_all (in_fp, ctf_link_one_type, arg);
 
   if (err == 0)
+    err = ctf_variable_iter (in_fp, ctf_link_one_variable, arg);
   arg->in_input_cu_file = 0;
   free (arg->arcname);
 
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 17/19] libctf: get rid of a disruptive public include of <sys/param.h>

Nick Alcock
In reply to this post by Nick Alcock
This hoary old header defines things like MAX that users of libctf might
perfectly reasonably define themselves.

The CTF headers do not need it: move it into libctf/ctf-impl.h instead.

include/
        * ctf-api.h (includes): No longer include <sys/param.h>.
libctf/
        * ctf-impl.h (includes): Include <sys/param.h> here.
---
 include/ctf-api.h | 1 -
 libctf/ctf-impl.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ctf-api.h b/include/ctf-api.h
index ac20551917..37a4d817fd 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -24,7 +24,6 @@
 #ifndef _CTF_API_H
 #define _CTF_API_H
 
-#include <sys/param.h>
 #include <sys/types.h>
 #include <ctf.h>
 #include <zlib.h>
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 0b72866b30..683f395160 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -22,6 +22,7 @@
 
 #include "config.h"
 #include <errno.h>
+#include <sys/param.h>
 #include "ctf-decls.h"
 #include <ctf-api.h>
 #include <sys/types.h>
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 18/19 REVIEW] bfd: new functions for getting strings out of a strtab

Nick Alcock
In reply to this post by Nick Alcock
The CTF linking process wants to deduplicate the CTF strtab against the
ELF strtab, for which it needs to know the number of strings in the
strtab and it needs to be able to extract them one by one.

The BFD strtab functions only support returning the
size-or-section-length of the strtab (with _bfd_elf_strtab_size)
and returning the offset (but not string!) and decrementing the refcount
at the same time.

So add new functions _bfd_elf_strtab_len (that just returns the length
in strings of the strtab, never the section size) and bfd_elf_strtab_str
(which returns the string at a given strtab index, and its offset,
without touching the refcount).

It is probably a mistake to use _bfd_elf_strtab_str in particular before
_bfd_elf_strtab_finalize is called, and will not produce useful output
if you do so.

bfd/
        * elf-strtab.c (_bfd_elf_strtab_len): New.
        (_bfd_elf_strtab_str): Likewise.
        * bfd-elf.h: Declare them.
---
 bfd/elf-bfd.h    |  4 ++++
 bfd/elf-strtab.c | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+)

The bfd bits of this need review, but I hope they will be uncontroversial.
There is no intent to use them to do evil things with string tables, only
to see what is there once ld has laid it out, without being forced to modify
its refcounts in the process.  (See the next commit for its use.)

diff --git a/bfd/elf-bfd.h b/bfd/elf-bfd.h
index 00443e6089..26f35a0f8a 100644
--- a/bfd/elf-bfd.h
+++ b/bfd/elf-bfd.h
@@ -2237,8 +2237,12 @@ extern void _bfd_elf_strtab_restore
   (struct elf_strtab_hash *, void *);
 extern bfd_size_type _bfd_elf_strtab_size
   (struct elf_strtab_hash *);
+extern bfd_size_type _bfd_elf_strtab_len
+  (struct elf_strtab_hash *);
 extern bfd_size_type _bfd_elf_strtab_offset
   (struct elf_strtab_hash *, size_t);
+extern const char * _bfd_elf_strtab_str
+  (struct elf_strtab_hash *, size_t idx, size_t *offset);
 extern bfd_boolean _bfd_elf_strtab_emit
   (bfd *, struct elf_strtab_hash *);
 extern void _bfd_elf_strtab_finalize
diff --git a/bfd/elf-strtab.c b/bfd/elf-strtab.c
index ec9002fae7..5d3eac7b40 100644
--- a/bfd/elf-strtab.c
+++ b/bfd/elf-strtab.c
@@ -270,6 +270,12 @@ _bfd_elf_strtab_size (struct elf_strtab_hash *tab)
   return tab->sec_size ? tab->sec_size : tab->size;
 }
 
+bfd_size_type
+_bfd_elf_strtab_len (struct elf_strtab_hash *tab)
+{
+  return tab->size;
+}
+
 bfd_size_type
 _bfd_elf_strtab_offset (struct elf_strtab_hash *tab, size_t idx)
 {
@@ -285,6 +291,19 @@ _bfd_elf_strtab_offset (struct elf_strtab_hash *tab, size_t idx)
   return tab->array[idx]->u.index;
 }
 
+const char *
+_bfd_elf_strtab_str (struct elf_strtab_hash *tab, size_t idx,
+                     bfd_size_type *offset)
+{
+  if (idx == 0)
+    return 0;
+  BFD_ASSERT (idx < tab->size);
+  BFD_ASSERT (tab->sec_size);
+  if (offset)
+    *offset = tab->array[idx]->u.index;
+  return tab->array[idx]->root.string;
+}
+
 bfd_boolean
 _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
 {
--
2.22.0.238.g049a27acdc

Reply | Threaded
Open this post in threaded view
|

[PATCH 19/19 REVIEW RFC] bfd, ld: add CTF section linking

Nick Alcock
In reply to this post by Nick Alcock
This is quite complicated because the CTF section's contents depend on
the final contents of the symtab and strtab, because it has two sections
whose contents are shuffled to be in 1:1 correspondence with the symtab,
and an internal strtab that gets deduplicated against the ELF strtab
(with offsets adjusted to point into the ELF strtab instead).  It is
also compressed if large enough, so its size depends on its contents!

So we cannot construct it as early as most sections: we cannot even
*begin* construction until after the symtab and strtab are finalized.
Thankfully there is already one section treated similarly: compressed
debugging sections: the only differences are that compressed debugging
sections have extra handling to deal with their changing name if
compressed (CTF sections are always called ".ctf" for now, though we
have reserved ".ctf.*" against future use), and that compressed
debugging sections have previously-uncompressed content which has to be
stashed away for later compression, while CTF sections have no content
at all until we generate it (very late).

BFD also cannot do the link itself: libctf knows how to do it, and BFD
cannot call libctf directly because libctf already depends on bfd for
file I/O.  So we have to use a pair of callbacks, one, examine_strtab,
which allows a caller to examine the symtab and strtab after
finalization (called from elf_link_swap_symbols_out(), right before the
symtabs are written, and after the strtab has been finalized), and one
which actually does the emission (called emit_ctf simply because it is
grouped with a bunch of section-specific late-emission function calls at
the bottom of bfd_elf_final_link, and a section-specific name seems best
for that).

Caveats and things I want help with / careful review of, oh are there a
lot of them! I struggled with this code for ages and this is not
particularly pretty: it's just the first thing I hit on that happened to
work.  On ELF.  It's still broken on non-ELF :( fixing that is my next
priority, but I thought I should throw this out there first in any case.
I've been silent for too long.

 - There are no SEC_* flags left, and we do not have a SHT_ type for CTF
   sections yet -- so we are reduced to doing string comparisons to
   determine whether we have to do special things for the CTF section.
   Nearly all of these things apply to any such "late-generated
   section", but in the absence of any remaining SEC_ flags I cannot say
   that.  Does anyone know if any of these flags are unused, or if we
   can make the flags word larger, or something?

 - The naming of functions in ldlang.c is opaque: some are ldlang_* and
   some are lang_*.  I've split the difference so things are named
   similarly to the functions called near them in lang_process, but this
   feels... wrong.  Is there any consistency here?

 - There is an arbitrary threshold value above which CTF sections are
   compressed (roughly: it is actually the threshold above which
   ctf_file_t's are compressed, and a CTF section may be comprised of
   many of these in a ctf_archive_t).  Right now I have arbitrarily
   hardwired this threshold at 4096 bytes.  Should it be configurable?
   Is a somewhat bigger value saner, on the grounds that wasting space
   on compression dictionaries for 4KiB files is just nuts? (64KiB is
   probably too big...)

 - We might well have memory leaks: we open the CTF sections for the
   input files and then never close them again.  Should we? ld does seem
   to operate on the basis that input files live forever so it doesn't
   matter if they are never freed...

 - Is the whole "set the length of the input CTF sections to zero to force
   their contents to not get copied, then forcibly emit them and jam their
   sizes in in emit_ctf/ldlang_write_ctf/
   _bfd_elf_assign_file_positions_for_non_load" thing too ugly to live?  I'm
   fairly happy with turning on SEC_NEVER_LOAD in the code rather than in
   the linker script simply because loading the thing via the ELF loader is
   almost certainly always a mistake (we already have functions to load CTF,
   and it's not done that way).  But is that *also* too ugly to live?  I
   don't know.

 - I'm fairly unhappy that I have to modify the default linker script,
   because that means that any project wanting CTF support and providing its
   own linker script will have to change -- including the Linux kernel, a
   major existing user (albeit not upstream yet, so that isn't so much of a
   problem: I can just make that change myself).  But without doing this, we
   never get an input->output mapping and the CTF sections are simply never
   emitted.  This seems to be because non- loaded sections are simply thrown
   away in the elf32.em orphan- assignment code: but even if it might be
   desirable, we *cannot* make this a loaded section, since that would
   require its size and position to be computed before the strtab is laid
   out, while the strtab dedup and compression requires us to compute it
   afterwards.  So we could avoid modifying the linker script by modifying
   the ELF orphan-assignment code, I suppose, but I have no idea which might
   be preferable.

 - I don't know if it's kosher for me to call bfd_elf_strtab_str from ldlang.c:
   what will happen on non-ELF-targetting binutils?  Maybe this has to go
   somewhere into the ELF emulation code, so that it is only linked in when ELF
   is targetted.  I guess this means the callback stays NULL and is populated from
   the ELF emulation somwhere?  I'm not sure...

 - Equally, at the moment all the CTF emission stuff is invoked from callbacks
   from late in ELF linking, because on ELF we have to emit the section contents
   so late.  I suppose we should do something different on non-ELF platforms
   (which don't try to deduplicate the strtabs or anything like that, so the
   emission could be done much earlier), but I really have no idea where one
   might put such a thing: right after the call to lang_merge_ctf in
   lang_process perhaps?.  Whatever it is, it shouldn't kick in on ELF
   platforms, but I don't want to end up duplicating code for every non-ELF
   platform either. (Note that the callback used to do the final CTF emission on
   ELF is in *BFD*, not in ld's per-target emulation code).

   ldlang_ctf_apply_strsym probably also needs to move into the emulation, or
   needs to turn into a straight call into a function which is only non-empty in
   the ELF emulation: at the moment, because it's just in ldlang.c, we're
   breaking non-ELF builds.  I'll do that in the next rev of this patch.


        * Makefile.def (dependencies): all-ld depends on all-libctf.
        * Makefile.in: Regenerated.

include/
        * bfdlink.h (elf_strtab_hash): New forward.
        (elf_sym_strtab): Likewise.
        (struct bfd_link_callbacks <examine_strtab>): New.
        (struct bfd_link_callbacks <emit_ctf>): Likewise.

bfd/
        * elf-bfd.h (SECTION_IS_CTF): New macro.
        * elf.c (special_sections_c): Add ".ctf".
        (assign_file_positions_for_non_load_sections): Note that
        compressed debugging sections etc are not assigned here.
        Treat CTF sections like SEC_ELF_COMPRESS sections: sh_offset -1.
        (assign_file_positions_except_relocs): Likewise.
        (find_section_in_list): Note that debugging and CTF sections, as
        well as reloc sections, are assigned later.
        (_bfd_elf_assign_file_positions_for_non_load): CTF sections get
        their size and contents updated.
        (_bfd_elf_set_section_contents): Skip CTF sections: unlike
        compressed sections, they have no uncompressed content to copy at
        this stage.
        * elflink.c (elf_link_swap_symbols_out): Call the examine_strtab
        callback right before the strtab is written out.
        (bfd_elf_final_link): Don't cache the section contents of CTF
        sections: they are not populated yet.  Call the emit_ctf callback
        right at the end, after all the symbols and strings are flushed
        out.
ld/
        * scripttempl/DWARF.sc: Add .ctf.
        * ldlang.h (includes): Add elf-bfd.h, ctf-api.h.  Prevent NAME in
        elf-bfd.h from wreaking havoc.
        (struct lang_input_statement_struct): Add the_ctf.
        (ldlang_ctf_apply_strsym): Declare.
        (ldlang_write_ctf): Likewise.
        * ldlang.c (includes): elf-bfd.h is now included by ldlang.h.
        Include elf/internal.h.
        * ldmain.c (link_callbacks): Add ldlang_ctf_apply_strsym,
        ldlang_write_ctf.
        (ctf_output): New. Initialized in...
        (ldlang_open_ctf): ... this new function.  Open all the CTF
        sections in the input files: mark them non-loaded and empty
        so as not to copy their contents to the output.
        (ldlang_merge_ctf): New, merge types via ctf_link_add_ctf and
        ctf_link.
        (ldlang_ctf_apply_strsym): New, an examine_strtab callback: tell
        libctf about the symtab and strtab.
        (struct ldlang_ctf_strsym_iter_cb_arg): New, state to do so.
        (ldlang_ctf_strtab_iter_cb): New: tell libctf about each string in
        the strtab in turn.
        (ldlang_ctf_symbols_iter_cb): New, tell libctf about each symbol
        in the symtab in turn.
        (ldlang_write_ctf): Write out the CTF section.
        (lang_process): Call ldlang_open_ctf() and lang_merge_ctf().

        * Makefile.am: Pull in libctf (and zlib, a transitive requirement
        for compressed CTF section emission).  Pass it on to DejaGNU.
        * configure.ac: Add AM_ZLIB.
        * aclocal.m4: Added zlib.m4.
        * Makefile.in: Regenerated.
        * testsuite/ld-bootstrap/bootstrap.exp: Use it when relinking ld.
---
 ChangeLog                               |   5 +
 Makefile.def                            |   1 +
 Makefile.in                             |   7 +
 bfd/elf-bfd.h                           |   6 +-
 bfd/elf.c                               |  45 +++--
 bfd/elflink.c                           |  55 +++---
 include/bfdlink.h                       |  15 ++
 ld/Makefile.am                          |  15 +-
 ld/Makefile.in                          |  23 ++-
 ld/aclocal.m4                           |   1 +
 ld/configure                            |  28 ++-
 ld/configure.ac                         |   4 +
 ld/ldlang.c                             | 232 +++++++++++++++++++++++-
 ld/ldlang.h                             |  13 ++
 ld/ldmain.c                             |   4 +-
 ld/scripttempl/DWARF.sc                 |   3 +
 ld/testsuite/ld-bootstrap/bootstrap.exp |   8 +-
 17 files changed, 412 insertions(+), 53 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 52d97d5fb8..8569bcf5ef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2019-07-13  Nick Alcock  <[hidden email]>
+
+ * Makefile.def (dependencies): all-ld depends on all-libctf.
+ * Makefile.in: Regenerated.
+
 2019-07-13  Joel Brobecker  <[hidden email]>
 
  * src-release (getver): If $tool/gdbsupport/create-version.sh
diff --git a/Makefile.def b/Makefile.def
index 28bf61d771..e887f498f4 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -432,6 +432,7 @@ dependencies = { module=all-binutils; on=all-build-bison; };
 dependencies = { module=all-binutils; on=all-intl; };
 dependencies = { module=all-binutils; on=all-gas; };
 dependencies = { module=all-binutils; on=all-libctf; };
+dependencies = { module=all-ld; on=all-libctf; };
 
 // We put install-opcodes before install-binutils because the installed
 // binutils might be on PATH, and they might need the shared opcodes
diff --git a/Makefile.in b/Makefile.in
index 7a6700af96..eeba51e829 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -51159,6 +51159,13 @@ all-stage3-binutils: maybe-all-stage3-libctf
 all-stage4-binutils: maybe-all-stage4-libctf
 all-stageprofile-binutils: maybe-all-stageprofile-libctf
 all-stagefeedback-binutils: maybe-all-stagefeedback-libctf
+all-ld: maybe-all-libctf
+all-stage1-ld: maybe-all-stage1-libctf
+all-stage2-ld: maybe-all-stage2-libctf
+all-stage3-ld: maybe-all-stage3-libctf
+all-stage4-ld: maybe-all-stage4-libctf
+all-stageprofile-ld: maybe-all-stageprofile-libctf
+all-stagefeedback-ld: maybe-all-stagefeedback-libctf
 install-binutils: maybe-install-opcodes
 install-strip-binutils: maybe-install-strip-opcodes
 install-opcodes: maybe-install-bfd
diff --git a/bfd/elf-bfd.h b/bfd/elf-bfd.h
index 26f35a0f8a..823e83eafb 100644
--- a/bfd/elf-bfd.h
+++ b/bfd/elf-bfd.h
@@ -2242,7 +2242,7 @@ extern bfd_size_type _bfd_elf_strtab_len
 extern bfd_size_type _bfd_elf_strtab_offset
   (struct elf_strtab_hash *, size_t);
 extern const char * _bfd_elf_strtab_str
-  (struct elf_strtab_hash *, size_t idx, size_t *offset);
+  (struct elf_strtab_hash *, size_t idx, bfd_size_type *offset);
 extern bfd_boolean _bfd_elf_strtab_emit
   (bfd *, struct elf_strtab_hash *);
 extern void _bfd_elf_strtab_finalize
@@ -2901,6 +2901,10 @@ extern asection _bfd_elf_large_com_section;
  || (H)->start_stop \
  || ((INFO)->dynamic && !(H)->dynamic)))
 
+/* Determine if a section contains CTF data, using its name.  */
+#define SECTION_IS_CTF(name) \
+ (strcmp ((name), ".ctf") == 0 || strncmp ((name), ".ctf.", 5) == 0)
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/bfd/elf.c b/bfd/elf.c
index 1c843327cf..f78a3a7fc4 100644
--- a/bfd/elf.c
+++ b/bfd/elf.c
@@ -2619,6 +2619,7 @@ static const struct bfd_elf_special_section special_sections_b[] =
 static const struct bfd_elf_special_section special_sections_c[] =
 {
   { STRING_COMMA_LEN (".comment"), 0, SHT_PROGBITS, 0 },
+  { STRING_COMMA_LEN (".ctf"), 0, SHT_PROGBITS,    0 },
   { NULL, 0, 0, 0,    0 }
 };
 
@@ -5839,7 +5840,8 @@ is_debuginfo_file (bfd *abfd)
   return TRUE;
 }
 
-/* Assign file positions for the other sections.  */
+/* Assign file positions for the other sections, except for compressed debugging
+   and other sections assigned in _bfd_elf_assign_file_positions_for_non_load().  */
 
 static bfd_boolean
 assign_file_positions_for_non_load_sections (bfd *abfd,
@@ -5899,8 +5901,10 @@ assign_file_positions_for_non_load_sections (bfd *abfd,
       else if (((hdr->sh_type == SHT_REL || hdr->sh_type == SHT_RELA)
  && hdr->bfd_section == NULL)
        || (hdr->bfd_section != NULL
-   && (hdr->bfd_section->flags & SEC_ELF_COMPRESS))
-   /* Compress DWARF debug sections.  */
+   && (hdr->bfd_section->flags & SEC_ELF_COMPRESS
+                       || SECTION_IS_CTF (hdr->bfd_section->name)))
+   /* We don't know the offset of these sections yet: their size
+                      has not been decided.  */
        || hdr == i_shdrpp[elf_onesymtab (abfd)]
        || (elf_symtab_shndx_list (abfd) != NULL
    && hdr == i_shdrpp[elf_symtab_shndx_list (abfd)->ndx])
@@ -6168,11 +6172,12 @@ find_section_in_list (unsigned int i, elf_section_list * list)
    VMAs must be known before this is called.
 
    Reloc sections come in two flavours: Those processed specially as
-   "side-channel" data attached to a section to which they apply, and
-   those that bfd doesn't process as relocations.  The latter sort are
-   stored in a normal bfd section by bfd_section_from_shdr.   We don't
-   consider the former sort here, unless they form part of the loadable
-   image.  Reloc sections not assigned here will be handled later by
+   "side-channel" data attached to a section to which they apply, and those that
+   bfd doesn't process as relocations.  The latter sort are stored in a normal
+   bfd section by bfd_section_from_shdr.  We don't consider the former sort
+   here, unless they form part of the loadable image.  Reloc sections not
+   assigned here (and compressed debugging sections and CTF sections which
+   nothing else in the file can rely upon) will be handled later by
    assign_file_positions_for_relocs.
 
    We also don't set the positions of the .symtab and .strtab here.  */
@@ -6208,8 +6213,10 @@ assign_file_positions_except_relocs (bfd *abfd,
   if (((hdr->sh_type == SHT_REL || hdr->sh_type == SHT_RELA)
        && hdr->bfd_section == NULL)
       || (hdr->bfd_section != NULL
-  && (hdr->bfd_section->flags & SEC_ELF_COMPRESS))
-  /* Compress DWARF debug sections.  */
+                  && (hdr->bfd_section->flags & SEC_ELF_COMPRESS
+                       || SECTION_IS_CTF (hdr->bfd_section->name)))
+              /* Do not assign offsets for these sections yet: we don't know
+                 their sizes.  */
       || i == elf_onesymtab (abfd)
       || (elf_symtab_shndx_list (abfd) != NULL
   && hdr == i_shdrpp[elf_symtab_shndx_list (abfd)->ndx])
@@ -6417,12 +6424,14 @@ _bfd_elf_assign_file_positions_for_non_load (bfd *abfd)
   asection *sec = shdrp->bfd_section;
   bfd_boolean is_rel = (shdrp->sh_type == SHT_REL
  || shdrp->sh_type == SHT_RELA);
+          bfd_boolean is_ctf = sec && SECTION_IS_CTF (sec->name);
   if (is_rel
+              || is_ctf
       || (sec != NULL && (sec->flags & SEC_ELF_COMPRESS)))
     {
-      if (!is_rel)
+      if (!is_rel && !is_ctf)
  {
-  const char *name = sec->name;
+                  const char *name = sec->name;
   struct bfd_elf_section_data *d;
 
   /* Compress DWARF debug sections.  */
@@ -6466,6 +6475,13 @@ _bfd_elf_assign_file_positions_for_non_load (bfd *abfd)
   shdrp->contents = sec->contents;
   shdrp->bfd_section->contents = NULL;
  }
+              else if (is_ctf)
+                {
+  /* Update section size and contents.  */
+  shdrp->sh_size = sec->size;
+  shdrp->contents = sec->contents;
+                }
+
       off = _bfd_elf_assign_file_position_for_section (shdrp,
        off,
        TRUE);
@@ -9045,6 +9061,11 @@ _bfd_elf_set_section_contents (bfd *abfd,
   hdr = &elf_section_data (section)->this_hdr;
   if (hdr->sh_offset == (file_ptr) -1)
     {
+      if (SECTION_IS_CTF (section->name))
+        /* Nothing to do with this section: the contents are generated
+           later.  */
+        return TRUE;
+
       /* We must compress this section.  Write output to the buffer.  */
       unsigned char *contents = hdr->contents;
       if ((offset + count) > hdr->sh_size
diff --git a/bfd/elflink.c b/bfd/elflink.c
index 9175d3fa20..258c2408bb 100644
--- a/bfd/elflink.c
+++ b/bfd/elflink.c
@@ -9507,6 +9507,15 @@ elf_link_swap_symbols_out (struct elf_final_link_info *flinfo)
  + elfsym->destshndx_index));
     }
 
+  /* Allow the linker to examine the strtab and symtab now they are
+     populated.  */
+
+  if (flinfo->info->callbacks->examine_strtab)
+    flinfo->info->callbacks->examine_strtab (hash_table->strtab,
+                                             hash_table->strtabcount,
+                                             flinfo->symstrtab);
+
+
   hdr = &elf_tdata (flinfo->output_bfd)->symtab_hdr;
   pos = hdr->sh_offset + hdr->sh_size;
   amt = hash_table->strtabcount * bed->s->sizeof_sym;
@@ -11779,7 +11788,7 @@ bfd_elf_final_link (bfd *abfd, struct bfd_link_info *info)
 
   /* The object attributes have been merged.  Remove the input
      sections from the link, and set the contents of the output
-     secton.  */
+     section.  */
   std_attrs_section = get_elf_backend_data (abfd)->obj_attrs_section;
   for (o = abfd->sections; o != NULL; o = o->next)
     {
@@ -12001,26 +12010,27 @@ bfd_elf_final_link (bfd *abfd, struct bfd_link_info *info)
       esdo->rel.count = 0;
       esdo->rela.count = 0;
 
-      if (esdo->this_hdr.sh_offset == (file_ptr) -1)
- {
-  /* Cache the section contents so that they can be compressed
-     later.  Use bfd_malloc since it will be freed by
-     bfd_compress_section_contents.  */
-  unsigned char *contents = esdo->this_hdr.contents;
-  if ((o->flags & SEC_ELF_COMPRESS) == 0 || contents != NULL)
-    abort ();
-  contents
-    = (unsigned char *) bfd_malloc (esdo->this_hdr.sh_size);
-  if (contents == NULL)
-    goto error_return;
-  esdo->this_hdr.contents = contents;
- }
-    }
-
-  /* We have now assigned file positions for all the sections except
-     .symtab, .strtab, and non-loaded reloc sections.  We start the
-     .symtab section at the current file position, and write directly
-     to it.  We build the .strtab section in memory.  */
+      if ((esdo->this_hdr.sh_offset == (file_ptr) -1)
+          && !SECTION_IS_CTF (o->name))
+        {
+          /* Cache the section contents so that they can be compressed
+             later.  Use bfd_malloc since it will be freed by
+             bfd_compress_section_contents.  */
+          unsigned char *contents = esdo->this_hdr.contents;
+          if ((o->flags & SEC_ELF_COMPRESS) == 0 || contents != NULL)
+            abort ();
+          contents
+            = (unsigned char *) bfd_malloc (esdo->this_hdr.sh_size);
+          if (contents == NULL)
+            goto error_return;
+          esdo->this_hdr.contents = contents;
+        }
+    }
+
+  /* We have now assigned file positions for all the sections except .symtab,
+     .strtab, and non-loaded reloc and compressed debugging sections.  We start
+     the .symtab section at the current file position, and write directly to it.
+     We build the .strtab section in memory.  */
   bfd_get_symcount (abfd) = 0;
   symtab_hdr = &elf_tdata (abfd)->symtab_hdr;
   /* sh_name is set in prep_headers.  */
@@ -12806,6 +12816,9 @@ bfd_elf_final_link (bfd *abfd, struct bfd_link_info *info)
   if (! _bfd_elf_write_section_eh_frame_hdr (abfd, info))
     goto error_return;
 
+  if (info->callbacks->emit_ctf)
+      info->callbacks->emit_ctf ();
+
   elf_final_link_free (abfd, &flinfo);
 
   elf_linker (abfd) = TRUE;
diff --git a/include/bfdlink.h b/include/bfdlink.h
index c35469dd11..ca24ae7fbf 100644
--- a/include/bfdlink.h
+++ b/include/bfdlink.h
@@ -630,6 +630,11 @@ struct bfd_link_info
   struct bfd_elf_version_tree *version_info;
 };
 
+/* Some forward-definitions used by some callbacks.  */
+
+struct elf_strtab_hash;
+struct elf_sym_strtab;
+
 /* This structures holds a set of callback functions.  These are called
    by the BFD linker routines.  */
 
@@ -751,6 +756,16 @@ struct bfd_link_callbacks
     (struct bfd_link_info *, bfd * abfd,
      asection * current_section, asection * previous_section,
      bfd_boolean new_segment);
+  /* This callback provides a chance for callers of the BFD to examine the
+     ELF string table and symbol table once they are complete and indexes and
+     offsets assigned.  */
+  void (*examine_strtab)
+    (struct elf_sym_strtab *syms, bfd_size_type symcount,
+     struct elf_strtab_hash *symstrtab);
+  /* This callback should emit the CTF section into a non-loadable section in
+     the output BFD named .ctf or a name beginning with ".ctf.".  */
+  void (*emit_ctf)
+    (void);
 };
 
 /* The linker builds link_order structures which tell the code how to
diff --git a/ld/Makefile.am b/ld/Makefile.am
index 0509c2e50f..6b2453a240 100644
--- a/ld/Makefile.am
+++ b/ld/Makefile.am
@@ -34,6 +34,12 @@ LEX = `if [ -f ../flex/flex ]; then echo ../flex/flex; else echo @LEX@; fi`
 am__skiplex =
 am__skipyacc =
 
+# This is where we get zlib from.  zlibdir is -L../zlib and zlibinc is
+# -I../zlib, unless we were configured with --with-system-zlib, in which
+# case both are empty.
+ZLIB = @zlibdir@ -lz
+ZLIBINC = @zlibinc@
+
 ELF_CLFAGS=-DELF_LIST_OPTIONS=@elf_list_options@ \
    -DELF_SHLIB_LIST_OPTIONS=@elf_shlib_list_options@ \
    -DELF_PLT_UNWIND_LIST_OPTIONS=@elf_plt_unwind_list_options@
@@ -145,12 +151,13 @@ AM_MAKEINFOFLAGS = -I $(srcdir) -I $(BFDDIR)/doc -I ../bfd/doc \
 TEXI2DVI = texi2dvi -I $(srcdir) -I $(BFDDIR)/doc -I ../bfd/doc \
     -I $(top_srcdir)/../libiberty
 
-AM_CPPFLAGS = -I. -I$(srcdir) -I../bfd -I$(BFDDIR) -I$(INCDIR) \
+AM_CPPFLAGS = -I. -I$(srcdir) -I../bfd -I$(BFDDIR) -I$(INCDIR) @zlibinc@ \
  @INCINTL@ $(HDEFINES) $(CFLAGS) $(PLUGIN_CFLAGS) \
  -DLOCALEDIR="\"$(datadir)/locale\""
 
 BFDLIB = ../bfd/libbfd.la
 LIBIBERTY = ../libiberty/libiberty.a
+LIBCTF = ../libctf/libctf.a
 
 # These all start with e so 'make clean' can find them.
 ALL_EMULATION_SOURCES = \
@@ -957,8 +964,8 @@ ld_new_SOURCES = ldgram.y ldlex-wrapper.c lexsup.c ldlang.c mri.c ldctor.c ldmai
  ldwrite.c ldexp.c ldemul.c ldver.c ldmisc.c ldfile.c ldcref.c $(PLUGIN_C) \
  ldbuildid.c
 ld_new_DEPENDENCIES = $(EMULATION_OFILES) $(EMUL_EXTRA_OFILES) \
-      $(BFDLIB) $(LIBIBERTY) $(LIBINTL_DEP)
-ld_new_LDADD = $(EMULATION_OFILES) $(EMUL_EXTRA_OFILES) $(BFDLIB) $(LIBIBERTY) $(LIBINTL)
+      $(BFDLIB) $(LIBCTF) $(LIBIBERTY) $(LIBINTL_DEP)
+ld_new_LDADD = $(EMULATION_OFILES) $(EMUL_EXTRA_OFILES) $(BFDLIB) $(LIBCTF) $(LIBIBERTY) $(LIBINTL) $(ZLIB)
 
 # Dependency tracking for the generated emulation files.
 EXTRA_ld_new_SOURCES += $(ALL_EMULATION_SOURCES) $(ALL_64_EMULATION_SOURCES)
@@ -977,7 +984,7 @@ check-DEJAGNU: site.exp
  CC="$(CC_FOR_TARGET)" CFLAGS="$(CFLAGS_FOR_TARGET)" \
  CXX="$(CXX_FOR_TARGET)" CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
  CC_FOR_HOST="$(CC)" CFLAGS_FOR_HOST="$(CFLAGS)" \
- OFILES="$(OFILES)" BFDLIB="$(TESTBFDLIB)" \
+ OFILES="$(OFILES)" BFDLIB="$(TESTBFDLIB)" LIBCTF="$(LIBCTF) $(ZLIB)" \
  LIBIBERTY="$(LIBIBERTY) $(LIBINTL)" LIBS="$(LIBS)" \
  DO_COMPARE="`echo '$(do_compare)' | sed -e 's,\\$$,,g'`" \
  $(RUNTESTFLAGS); \
diff --git a/ld/Makefile.in b/ld/Makefile.in
index 9898392a77..6e7212dbf9 100644
--- a/ld/Makefile.in
+++ b/ld/Makefile.in
@@ -122,9 +122,9 @@ am__aclocal_m4_deps = $(top_srcdir)/../bfd/acinclude.m4 \
  $(top_srcdir)/../config/plugins.m4 \
  $(top_srcdir)/../config/po.m4 \
  $(top_srcdir)/../config/progtest.m4 \
- $(top_srcdir)/../libtool.m4 $(top_srcdir)/../ltoptions.m4 \
- $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \
- $(top_srcdir)/../lt~obsolete.m4 \
+ $(top_srcdir)/../config/zlib.m4 $(top_srcdir)/../libtool.m4 \
+ $(top_srcdir)/../ltoptions.m4 $(top_srcdir)/../ltsugar.m4 \
+ $(top_srcdir)/../ltversion.m4 $(top_srcdir)/../lt~obsolete.m4 \
  $(top_srcdir)/../bfd/version.m4 $(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
  $(ACLOCAL_M4)
@@ -534,6 +534,8 @@ top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 use_sysroot = @use_sysroot@
+zlibdir = @zlibdir@
+zlibinc = @zlibinc@
 AUTOMAKE_OPTIONS = dejagnu no-texinfo.tex no-dist foreign info-in-builddir
 ACLOCAL_AMFLAGS = -I .. -I ../config -I ../bfd
 TEXINFO_TEX = ../texinfo/texinfo.tex
@@ -544,6 +546,12 @@ tooldir = $(exec_prefix)/$(target_alias)
 # maintainer mode is disabled.  Avoid this.
 am__skiplex =
 am__skipyacc =
+
+# This is where we get zlib from.  zlibdir is -L../zlib and zlibinc is
+# -I../zlib, unless we were configured with --with-system-zlib, in which
+# case both are empty.
+ZLIB = @zlibdir@ -lz
+ZLIBINC = @zlibinc@
 ELF_CLFAGS = -DELF_LIST_OPTIONS=@elf_list_options@ \
    -DELF_SHLIB_LIST_OPTIONS=@elf_shlib_list_options@ \
    -DELF_PLT_UNWIND_LIST_OPTIONS=@elf_plt_unwind_list_options@
@@ -632,12 +640,13 @@ AM_MAKEINFOFLAGS = -I $(srcdir) -I $(BFDDIR)/doc -I ../bfd/doc \
 TEXI2DVI = texi2dvi -I $(srcdir) -I $(BFDDIR)/doc -I ../bfd/doc \
     -I $(top_srcdir)/../libiberty
 
-AM_CPPFLAGS = -I. -I$(srcdir) -I../bfd -I$(BFDDIR) -I$(INCDIR) \
+AM_CPPFLAGS = -I. -I$(srcdir) -I../bfd -I$(BFDDIR) -I$(INCDIR) @zlibinc@ \
  @INCINTL@ $(HDEFINES) $(CFLAGS) $(PLUGIN_CFLAGS) \
  -DLOCALEDIR="\"$(datadir)/locale\""
 
 BFDLIB = ../bfd/libbfd.la
 LIBIBERTY = ../libiberty/libiberty.a
+LIBCTF = ../libctf/libctf.a
 
 # These all start with e so 'make clean' can find them.
 ALL_EMULATION_SOURCES = \
@@ -998,9 +1007,9 @@ ld_new_SOURCES = ldgram.y ldlex-wrapper.c lexsup.c ldlang.c mri.c ldctor.c ldmai
  ldbuildid.c
 
 ld_new_DEPENDENCIES = $(EMULATION_OFILES) $(EMUL_EXTRA_OFILES) \
-      $(BFDLIB) $(LIBIBERTY) $(LIBINTL_DEP)
+      $(BFDLIB) $(LIBCTF) $(LIBIBERTY) $(LIBINTL_DEP)
 
-ld_new_LDADD = $(EMULATION_OFILES) $(EMUL_EXTRA_OFILES) $(BFDLIB) $(LIBIBERTY) $(LIBINTL)
+ld_new_LDADD = $(EMULATION_OFILES) $(EMUL_EXTRA_OFILES) $(BFDLIB) $(LIBCTF) $(LIBIBERTY) $(LIBINTL) $(ZLIB)
 #
 #
 # Build a dummy plugin using libtool.
@@ -2560,7 +2569,7 @@ check-DEJAGNU: site.exp
  CC="$(CC_FOR_TARGET)" CFLAGS="$(CFLAGS_FOR_TARGET)" \
  CXX="$(CXX_FOR_TARGET)" CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
  CC_FOR_HOST="$(CC)" CFLAGS_FOR_HOST="$(CFLAGS)" \
- OFILES="$(OFILES)" BFDLIB="$(TESTBFDLIB)" \
+ OFILES="$(OFILES)" BFDLIB="$(TESTBFDLIB)" LIBCTF="$(LIBCTF) $(ZLIB)" \
  LIBIBERTY="$(LIBIBERTY) $(LIBINTL)" LIBS="$(LIBS)" \
  DO_COMPARE="`echo '$(do_compare)' | sed -e 's,\\$$,,g'`" \
  $(RUNTESTFLAGS); \
diff --git a/ld/aclocal.m4 b/ld/aclocal.m4
index 4408082888..7df8bf68f1 100644
--- a/ld/aclocal.m4
+++ b/ld/aclocal.m4
@@ -1198,6 +1198,7 @@ m4_include([../config/override.m4])
 m4_include([../config/plugins.m4])
 m4_include([../config/po.m4])
 m4_include([../config/progtest.m4])
+m4_include([../config/zlib.m4])
 m4_include([../libtool.m4])
 m4_include([../ltoptions.m4])
 m4_include([../ltsugar.m4])
diff --git a/ld/configure b/ld/configure
index 3b50f5db8e..db3424fdb8 100755
--- a/ld/configure
+++ b/ld/configure
@@ -645,6 +645,8 @@ elf_plt_unwind_list_options
 elf_shlib_list_options
 elf_list_options
 STRINGIFY
+zlibinc
+zlibdir
 enable_initfini_array
 ENABLE_PLUGINS_FALSE
 ENABLE_PLUGINS_TRUE
@@ -834,6 +836,7 @@ enable_werror
 enable_build_warnings
 enable_nls
 enable_initfini_array
+with_system_zlib
 '
       ac_precious_vars='build_alias
 host_alias
@@ -1510,6 +1513,7 @@ Optional Packages:
   --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
   --with-lib-path=dir1:dir2...  set default LIB_PATH
   --with-sysroot=DIR Search for usr/lib et al within DIR.
+  --with-system-zlib      use installed libz
 
 Some influential environment variables:
   CC          C compiler command
@@ -12027,7 +12031,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12030 "configure"
+#line 12034 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12133,7 +12137,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12136 "configure"
+#line 12140 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17385,6 +17389,26 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+# Link in zlib if we can.  This allows us to read and write
+# compressed CTF sections.
+
+  # Use the system's zlib library.
+  zlibdir="-L\$(top_builddir)/../zlib"
+  zlibinc="-I\$(top_srcdir)/../zlib"
+
+# Check whether --with-system-zlib was given.
+if test "${with_system_zlib+set}" = set; then :
+  withval=$with_system_zlib; if test x$with_system_zlib = xyes ; then
+    zlibdir=
+    zlibinc=
+  fi
+
+fi
+
+
+
+
+
 # When converting linker scripts into strings for use in emulation
 # files, use astring.sed if the compiler supports ANSI string
 # concatenation, or ostring.sed otherwise.  This is to support the
diff --git a/ld/configure.ac b/ld/configure.ac
index ee62d10ac5..ab65d569c3 100644
--- a/ld/configure.ac
+++ b/ld/configure.ac
@@ -293,6 +293,10 @@ BFD_BINARY_FOPEN
 
 AC_CHECK_DECLS([strstr, free, sbrk, getenv, environ])
 
+# Link in zlib if we can.  This allows us to read and write
+# compressed CTF sections.
+AM_ZLIB
+
 # When converting linker scripts into strings for use in emulation
 # files, use astring.sed if the compiler supports ANSI string
 # concatenation, or ostring.sed otherwise.  This is to support the
diff --git a/ld/ldlang.c b/ld/ldlang.c
index 3f25b363d0..466c7f0748 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -39,7 +39,6 @@
 #include "fnmatch.h"
 #include "demangle.h"
 #include "hashtab.h"
-#include "elf-bfd.h"
 #ifdef ENABLE_PLUGINS
 #include "plugin.h"
 #endif /* ENABLE_PLUGINS */
@@ -127,6 +126,7 @@ bfd_boolean delete_output_file_on_failure = FALSE;
 struct lang_phdr *lang_phdr_list;
 struct lang_nocrossrefs *nocrossref_list;
 struct asneeded_minfo **asneeded_list_tail;
+static ctf_file_t *ctf_output;
 
  /* Functions that traverse the linker script and might evaluate
     DEFINED() need to increment this at the start of the traversal.  */
@@ -3575,6 +3575,230 @@ open_input_bfds (lang_statement_union_type *s, enum open_bfd_mode mode)
     einfo ("%F");
 }
 
+/* Open the CTF sections in the input files with libctf: if any were opened,
+   create a fake input file that we'll write the merged CTF data to later
+   on.  */
+
+static void
+ldlang_open_ctf (void)
+{
+  int any_ctf = 0;
+  int err;
+
+  LANG_FOR_EACH_INPUT_STATEMENT (file)
+    {
+      asection *sect;
+
+      /* Incoming files from the compiler have a single ctf_file_t in them
+         (which is presented to us by the libctf API in a ctf_archive_t
+         wrapper): files derived from a previous relocatable link have a CTF
+         archive containing possibly many CTF files.  */
+
+      if ((file->the_ctf = ctf_bfdopen (file->the_bfd, &err)) == NULL)
+        {
+          if (err != ECTF_NOCTFDATA)
+            einfo (_("%P: warning: CTF section in `%pI' not loaded: "
+                     "its types will be discarded: `%s'\n"), file,
+                     ctf_errmsg (err));
+          continue;
+        }
+
+      /* Prevent the contents of this section from being written, while
+         requiring the section itself to be duplicated in the output.  */
+      /* This section must exist if ctf_bfdopen() succeeded.  */
+      sect = bfd_get_section_by_name (file->the_bfd, ".ctf");
+      sect->size = 0;
+      sect->flags |= SEC_NEVER_LOAD | SEC_HAS_CONTENTS;
+
+      any_ctf = 1;
+    }
+
+  if (!any_ctf)
+    {
+      ctf_output = NULL;
+      return;
+    }
+
+  if ((ctf_output = ctf_create (&err)) != NULL)
+    return;
+
+  einfo (_("%P: warning: CTF output not created: `s'\n"),
+         ctf_errmsg (err));
+
+  LANG_FOR_EACH_INPUT_STATEMENT (errfile)
+    ctf_close (errfile->the_ctf);
+}
+
+/* Merge together CTF sections.  After this, only the symtab-dependent
+   function and data object sections need adjustment.  */
+
+static void
+lang_merge_ctf (void)
+{
+  if (!ctf_output)
+    return;
+
+  LANG_FOR_EACH_INPUT_STATEMENT (file)
+    {
+      if (!file->the_ctf)
+        continue;
+
+      /* Takes ownership of file->u.the_ctfa.  */
+      if (ctf_link_add_ctf (ctf_output, file->the_ctf, file->filename) < 0)
+        {
+          einfo (_("%F%P: cannot link with CTF in %pB: %s\n"), file->the_bfd,
+                 ctf_errmsg (ctf_errno (ctf_output)));
+          ctf_close (file->the_ctf);
+          file->the_ctf = NULL;
+          continue;
+        }
+    }
+
+  if (ctf_link (ctf_output, CTF_LINK_SHARE_UNCONFLICTED) < 0)
+    {
+      asection *output_sect;
+
+      einfo (_("%F%P: CTF linking failed; output will have no CTF section: %s\n"),
+             ctf_errmsg (ctf_errno (ctf_output)));
+      output_sect = bfd_get_section_by_name (link_info.output_bfd, ".ctf");
+      output_sect->size = 0;
+      output_sect->flags |= SEC_EXCLUDE;
+    }
+}
+
+/* Callbacks used to map from bfd types to libctf types, under libctf's
+   control.  */
+
+struct ldlang_ctf_strsym_iter_cb_arg
+{
+  struct elf_sym_strtab *syms;
+  bfd_size_type symcount;
+  struct elf_strtab_hash *symstrtab;
+  size_t next_i;
+  size_t next_idx;
+};
+
+/* Return strings from the strtab to libctf, one by one.  Returns NULL when
+   iteration is complete.  */
+
+static const char *
+ldlang_ctf_strtab_iter_cb (uint32_t *offset, void *arg_)
+{
+  bfd_size_type off;
+  const char *ret;
+
+  struct ldlang_ctf_strsym_iter_cb_arg *arg =
+    (struct ldlang_ctf_strsym_iter_cb_arg *) arg_;
+
+  /* There is no zeroth string.  */
+  if (arg->next_i == 0)
+    arg->next_i = 1;
+
+  if (arg->next_i >= _bfd_elf_strtab_len (arg->symstrtab))
+    {
+      arg->next_i = 0;
+      return NULL;
+    }
+
+  ret = _bfd_elf_strtab_str (arg->symstrtab, arg->next_i++, &off);
+  *offset = off;
+
+  /* If we've overflowed, we cannot share any further strings: the CTF
+     format cannot encode strings with such high offsets.  */
+  if (*offset != off)
+    return NULL;
+
+  return ret;
+}
+
+/* Return symbols from the symbol table to libctf, one by one.  We assume (and
+   assert) that the symbols in the elf_link_hash_table are in strictly ascending
+   order, and that none will be added in between existing ones.  Returns NULL
+   when iteration is complete.  */
+
+static struct ctf_link_sym *
+ldlang_ctf_symbols_iter_cb (struct ctf_link_sym *dest, void *arg_)
+{
+  struct ldlang_ctf_strsym_iter_cb_arg *arg =
+    (struct ldlang_ctf_strsym_iter_cb_arg *) arg_;
+
+  if (arg->next_i > arg->symcount)
+    {
+      arg->next_i = 0;
+      arg->next_idx = 0;
+      return NULL;
+    }
+
+  ASSERT (arg->syms[arg->next_i].dest_index == arg->next_idx);
+  dest->st_name = _bfd_elf_strtab_str (arg->symstrtab, arg->next_i, NULL);
+  dest->st_shndx = arg->syms[arg->next_i].sym.st_shndx;
+  dest->st_type = ELF_ST_TYPE (arg->syms[arg->next_i].sym.st_info);
+  dest->st_value = arg->syms[arg->next_i].sym.st_value;
+  arg->next_i++;
+  return dest;
+}
+
+/* Suck in the final string table for deduplication, and reshuffle the output
+   CTF file in accordance with the final symbol indexes.  We only do this on the
+   last, nonrelocatable link, since only at that time is the symbol table
+   finalized and unchanging.  */
+
+void
+ldlang_ctf_apply_strsym (struct elf_sym_strtab *syms, bfd_size_type symcount,
+                         struct elf_strtab_hash *symstrtab)
+{
+  struct ldlang_ctf_strsym_iter_cb_arg args = { syms, symcount, symstrtab,
+                                                0, 0 };
+
+  if (!ctf_output)
+    return;
+
+  if (!bfd_link_relocatable (&link_info))
+    {
+      if (ctf_link_add_strtab (ctf_output, ldlang_ctf_strtab_iter_cb,
+                               &args) < 0)
+        einfo (_("%F%P: warning: CTF strtab association failed; strings will "
+                 "not be shared: %s\n"),
+               ctf_errmsg (ctf_errno (ctf_output)));
+
+      if (ctf_link_shuffle_syms (ctf_output, ldlang_ctf_symbols_iter_cb,
+                                 &args) < 0)
+        einfo (_("%F%P: warning: CTF symbol shuffling failed; slight space "
+                 "cost: %s\n"), ctf_errmsg (ctf_errno (ctf_output)));
+    }
+}
+
+/* Write out the CTF section.  */
+
+void
+ldlang_write_ctf (void)
+{
+  size_t output_size;
+  asection *output_sect;
+
+  if (!ctf_output)
+    return;
+
+  /* 4096 below is an arbitrary size above which we want CTF files to be
+     compressed.  TODO: maybe this should be configurable?  */
+  output_sect = bfd_get_section_by_name (link_info.output_bfd, ".ctf");
+  output_sect->contents = ctf_link_write (ctf_output, &output_size, 4096);
+  output_sect->size = output_size;
+
+  /* TODO: nothing ever frees this: is that OK?  Lots of other section contents
+     at BFD link time seem to leak as well..  */
+
+  if (!output_sect->contents)
+    {
+      einfo (_("%F%P: CTF section emission failed; output will have no "
+               "CTF section: %s\n"), ctf_errmsg (ctf_errno (ctf_output)));
+      output_sect->size = 0;
+      output_sect->flags |= SEC_EXCLUDE;
+    }
+  ctf_file_close (ctf_output);
+  /* TODO: what about the input files' CTF sections?  Should we free them?  */
+}
+
 /* Add the supplied name to the symbol table as an undefined reference.
    This is a two step process as the symbol table doesn't even exist at
    the time the ld command line is processed.  First we put the name
@@ -7503,6 +7727,8 @@ lang_process (void)
   if (config.map_file != NULL)
     lang_print_asneeded ();
 
+  ldlang_open_ctf ();
+
   bfd_section_already_linked_table_free ();
 
   /* Make sure that we're not mixing architectures.  We call this
@@ -7577,6 +7803,10 @@ lang_process (void)
  }
     }
 
+  /* Merge together CTF sections.  After this, only the symtab-dependent
+     function and data object sections need adjustment.  */
+  lang_merge_ctf ();
+
   /* Copy forward lma regions for output sections in same lma region.  */
   lang_propagate_lma_regions ();
 
diff --git a/ld/ldlang.h b/ld/ldlang.h
index 4e96a20c15..00170f2c9a 100644
--- a/ld/ldlang.h
+++ b/ld/ldlang.h
@@ -21,6 +21,13 @@
 #ifndef LDLANG_H
 #define LDLANG_H
 
+#include "elf-bfd.h"
+#include "ctf-api.h"
+
+/* This is defined in elf-bfd.h and far too invasively named to be allowed into
+   ld.  */
+#undef NAME
+
 #define DEFAULT_MEMORY_REGION   "*default*"
 
 typedef enum
@@ -306,6 +313,8 @@ typedef struct lang_input_statement_struct
 
   bfd *the_bfd;
 
+  ctf_archive_t *the_ctf;
+
   struct flag_info *section_flag_list;
 
   /* Next pointer for file_chain statement list.  */
@@ -688,6 +697,10 @@ extern void add_excluded_libs (const char *);
 extern bfd_boolean load_symbols
   (lang_input_statement_type *, lang_statement_list_type *);
 
+extern void ldlang_ctf_apply_strsym
+  (struct elf_sym_strtab *, bfd_size_type, struct elf_strtab_hash *);
+extern void ldlang_write_ctf
+  (void);
 extern bfd_boolean
 ldlang_override_segment_assignment
   (struct bfd_link_info *, bfd *, asection *, asection *, bfd_boolean);
diff --git a/ld/ldmain.c b/ld/ldmain.c
index a7ca4f487d..e62f92fbb1 100644
--- a/ld/ldmain.c
+++ b/ld/ldmain.c
@@ -148,7 +148,9 @@ static struct bfd_link_callbacks link_callbacks =
   einfo,
   info_msg,
   minfo,
-  ldlang_override_segment_assignment
+  ldlang_override_segment_assignment,
+  ldlang_ctf_apply_strsym,
+  ldlang_write_ctf
 };
 
 static bfd_assert_handler_type default_bfd_assert_handler;
diff --git a/ld/scripttempl/DWARF.sc b/ld/scripttempl/DWARF.sc
index 9c9cb649ef..bd86cadd47 100644
--- a/ld/scripttempl/DWARF.sc
+++ b/ld/scripttempl/DWARF.sc
@@ -43,4 +43,7 @@ cat <<EOF
   /* DWARF Extension.  */
   .debug_macro    0 : { *(.debug_macro) }
   .debug_addr     0 : { *(.debug_addr) }
+
+  /* CTF.  */
+  .ctf            : { KEEP(*(.ctf)) }
 EOF
diff --git a/ld/testsuite/ld-bootstrap/bootstrap.exp b/ld/testsuite/ld-bootstrap/bootstrap.exp
index ee9442da16..a303c05e10 100644
--- a/ld/testsuite/ld-bootstrap/bootstrap.exp
+++ b/ld/testsuite/ld-bootstrap/bootstrap.exp
@@ -160,7 +160,7 @@ foreach flags $test_flags {
  setup_xfail "mips*-*-irix5*"
     }
 
-    if ![ld_link $CC tmpdir/ld1 "$flags tmpdir/ld-partial.o $BFDLIB $LIBIBERTY $extralibs"] {
+    if ![ld_link $CC tmpdir/ld1 "$flags tmpdir/ld-partial.o $LIBCTF $BFDLIB $LIBIBERTY $extralibs"] {
  fail $testname
  continue
     }
@@ -177,13 +177,13 @@ foreach flags $test_flags {
     }
 
     regsub /tmpdir/ld/ $gcc_B_opt_save /tmpdir/gccld1/ gcc_B_opt
-    if ![ld_link $CC tmpdir/ld2 "$flags $OFILES $BFDLIB $LIBIBERTY $extralibs"] {
+    if ![ld_link $CC tmpdir/ld2 "$flags $OFILES $LIBCTF $BFDLIB $LIBIBERTY $extralibs"] {
  fail $testname
  continue
     }
 
     regsub /tmpdir/ld/ $gcc_B_opt_save /tmpdir/gccld2/ gcc_B_opt
-    if ![ld_link $CC tmpdir/ld3 "$flags $OFILES $BFDLIB $LIBIBERTY $extralibs"] {
+    if ![ld_link $CC tmpdir/ld3 "$flags $OFILES $LIBCTF $BFDLIB $LIBIBERTY $extralibs"] {
  fail $testname
  continue
     }
@@ -196,7 +196,7 @@ foreach flags $test_flags {
  # generated by different linkers, tmpdir/ld1 and tmpdir/ld2.
  # So we rebuild tmpdir/ld2 with tmpdir/ld3.
  regsub /tmpdir/ld/ $gcc_B_opt_save /tmpdir/gccld3/ gcc_B_opt
- if ![ld_link $CC tmpdir/ld2 "$flags $OFILES $BFDLIB $LIBIBERTY $extralibs"] {
+ if ![ld_link $CC tmpdir/ld2 "$flags $OFILES $LIBCTF $BFDLIB $LIBIBERTY $extralibs"] {
     fail $testname
     continue
  }
--
2.22.0.238.g049a27acdc

12