[PATCH] translate.cxx: Make stap-symbols.h a separate CU.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] translate.cxx: Make stap-symbols.h a separate CU.

Yichun Zhang (agentzh)-2
This increases the kernel module compilation parallelism. The Pass-4
is now about 700ms faster when --ldd is used for a simple .stp script
file using print_ubacktrace() on a Intel Core i9-9900K machine.
---
 buildrun.cxx          |  4 ++-
 runtime/sym.h         | 14 +++++-----
 translate.cxx         | 62 ++++++++++++++++++++++++++++---------------
 translator-output.cxx | 26 ++++++++++++++++--
 translator-output.h   |  5 ++++
 5 files changed, 80 insertions(+), 31 deletions(-)

diff --git a/buildrun.cxx b/buildrun.cxx
index 14043adbb..629a6d5d0 100644
--- a/buildrun.cxx
+++ b/buildrun.cxx
@@ -607,7 +607,9 @@ compile_pass (systemtap_session& s)
       objname[objname.size()-1] = 'o'; // now objname
       o << " " + objname;
     }
-  o << endl;
+  o << " stap_symbols.o" << endl;
+
+  o << s.tmpdir << "/stap_symbols.o: $(STAPCONF_HEADER)" << endl;
 
   // add all stapconf dependencies
   string translated = s.translated_source;
diff --git a/runtime/sym.h b/runtime/sym.h
index 990500083..92075814f 100644
--- a/runtime/sym.h
+++ b/runtime/sym.h
@@ -133,13 +133,13 @@ struct _stp_module {
 };
 
 /* Defined by translator-generated stap-symbols.h. */
-static struct _stp_module *_stp_modules [];
-static const unsigned _stp_num_modules;
+extern struct _stp_module *_stp_modules [];
+extern const unsigned _stp_num_modules;
 
 /* Used in the unwinder to special case unwinding through kretprobes. */
 /* Initialized through translator (stap-symbols.h) relative to kernel */
 /* load address, fixup by transport symbols _stp_do_relocation */
-static unsigned long _stp_kretprobe_trampoline;
+extern unsigned long _stp_kretprobe_trampoline;
 
 static unsigned long _stp_kmodule_relocate (const char *module,
     const char *section,
@@ -154,10 +154,10 @@ static void _stp_kmodule_update_address(const char* module,
 
 #if (defined(STP_USE_DWARF_UNWINDER) && defined(STP_NEED_UNWIND_DATA)) \
     || defined(STP_NEED_LINE_DATA)
-static struct _stp_module _stp_module_self;
-static struct _stp_section _stp_module_self_sections[];
-static struct _stp_symbol _stp_module_self_symbols_0[];
-static struct _stp_symbol _stp_module_self_symbols_1[];
+extern struct _stp_module _stp_module_self;
+extern struct _stp_section _stp_module_self_sections[];
+extern struct _stp_symbol _stp_module_self_symbols_0[];
+extern struct _stp_symbol _stp_module_self_symbols_1[];
 #endif /* defined(STP_USE_DWARF_UNWINDER) && defined(STP_NEED_UNWIND_DATA)
           || defined(STP_NEED_LINE_DATA) */
 #endif /* _STP_SYM_H_ */
diff --git a/translate.cxx b/translate.cxx
index f5a73eca9..cabcad55a 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -7610,12 +7610,15 @@ prepare_symbol_data (systemtap_session& s)
 void
 emit_symbol_data (systemtap_session& s)
 {
-  string symfile = "stap-symbols.h";
-
-  s.op->newline() << "#include " << lex_cast_qstring (symfile);
+  string symfile = "stap_symbols.c";
 
   ofstream kallsyms_out ((s.tmpdir + "/" + symfile).c_str());
 
+  kallsyms_out << "#include <linux/module.h>\n"
+    "#include <linux/kernel.h>\n"
+    "#include <sym.h>\n"
+    "#include \"stap_common.h\"\n";
+
   vector<pair<string,unsigned> > seclist;
   map<unsigned, addrmap_t> addrmap;
   unwindsym_dump_context ctx = { s, kallsyms_out,
@@ -7712,13 +7715,13 @@ void
 self_unwind_declarations(unwindsym_dump_context *ctx)
 {
   ctx->output << "static uint8_t _stp_module_self_eh_frame [] = {0,};\n";
-  ctx->output << "static struct _stp_symbol _stp_module_self_symbols_0[] = {{0},};\n";
-  ctx->output << "static struct _stp_symbol _stp_module_self_symbols_1[] = {{0},};\n";
-  ctx->output << "static struct _stp_section _stp_module_self_sections[] = {\n";
+  ctx->output << "struct _stp_symbol _stp_module_self_symbols_0[] = {{0},};\n";
+  ctx->output << "struct _stp_symbol _stp_module_self_symbols_1[] = {{0},};\n";
+  ctx->output << "struct _stp_section _stp_module_self_sections[] = {\n";
   ctx->output << "{.name = \".symtab\", .symbols = _stp_module_self_symbols_0, .num_symbols = 0},\n";
   ctx->output << "{.name = \".text\", .symbols = _stp_module_self_symbols_1, .num_symbols = 0},\n";
   ctx->output << "};\n";
-  ctx->output << "static struct _stp_module _stp_module_self = {\n";
+  ctx->output << "struct _stp_module _stp_module_self = {\n";
   ctx->output << ".name = \"stap_self_tmp_value\",\n";
   ctx->output << ".path = \"stap_self_tmp_value\",\n";
   ctx->output << ".num_sections = 2,\n";
@@ -7751,16 +7754,16 @@ emit_symbol_data_done (unwindsym_dump_context *ctx, systemtap_session& s)
   // Print out a definition of the runtime's _stp_modules[] globals.
   ctx->output << "\n";
   self_unwind_declarations(ctx);
-   ctx->output << "static struct _stp_module *_stp_modules [] = {\n";
+   ctx->output << "struct _stp_module *_stp_modules [] = {\n";
   for (unsigned i=0; i<ctx->stp_module_index; i++)
     {
       ctx->output << "& _stp_module_" << i << ",\n";
     }
   ctx->output << "& _stp_module_self,\n";
   ctx->output << "};\n";
-  ctx->output << "static const unsigned _stp_num_modules = ARRAY_SIZE(_stp_modules);\n";
+  ctx->output << "const unsigned _stp_num_modules = ARRAY_SIZE(_stp_modules);\n";
 
-  ctx->output << "static unsigned long _stp_kretprobe_trampoline = ";
+  ctx->output << "unsigned long _stp_kretprobe_trampoline = ";
   // Special case for -1, which is invalid in hex if host width > target width.
   if (ctx->stp_kretprobe_trampoline_addr == (unsigned long) -1)
     ctx->output << "-1;\n";
@@ -7860,8 +7863,11 @@ int
 translate_pass (systemtap_session& s)
 {
   int rc = 0;
+  string comm_hdr_file = s.tmpdir + "/stap_common.h";
 
   s.op = new translator_output (s.translated_source);
+  s.op->new_common_header (comm_hdr_file);
+
   // additional outputs might be found in s.auxiliary_outputs
   c_unparser cup (& s);
   s.up = & cup;
@@ -7929,31 +7935,45 @@ translate_pass (systemtap_session& s)
       << "__attribute__ ((section (\"" << STAP_PRIVILEGE_SECTION <<"\")))"
       << " = STP_PRIVILEGE;";
 
-      s.op->newline() << "#ifndef MAXNESTING";
-      s.op->newline() << "#define MAXNESTING " << nesting;
-      s.op->newline() << "#endif";
+      s.op->newline() << "#include \"stap_common.h\"";
+
+      s.op->hdr->newline() << "#ifndef MAXNESTING";
+      s.op->hdr->newline() << "#define MAXNESTING " << nesting;
+      s.op->hdr->newline() << "#endif";
 
       // Generated macros specifying how much storage is required for
       // regexp subexpressions. (TODOXXX Skip when there are no DFAs?)
-      s.op->newline() << "#define STAPREGEX_MAX_MAP " << s.dfa_maxmap;
-      s.op->newline() << "#define STAPREGEX_MAX_TAG " << s.dfa_maxtag;
+      s.op->hdr->newline() << "#define STAPREGEX_MAX_MAP " << s.dfa_maxmap;
+      s.op->hdr->newline() << "#define STAPREGEX_MAX_TAG " << s.dfa_maxtag;
 
-      s.op->newline() << "#define STP_SKIP_BADVARS " << (s.skip_badvars ? 1 : 0);
+      s.op->hdr->newline() << "#define STP_SKIP_BADVARS " << (s.skip_badvars ? 1 : 0);
 
       if (s.bulk_mode)
-  s.op->newline() << "#define STP_BULKMODE";
+  s.op->hdr->newline() << "#define STP_BULKMODE";
 
       if (s.timing || s.monitor)
- s.op->newline() << "#define STP_TIMING";
+ s.op->hdr->newline() << "#define STP_TIMING";
 
       if (s.need_unwind)
- s.op->newline() << "#define STP_NEED_UNWIND_DATA 1";
+ s.op->hdr->newline() << "#define STP_NEED_UNWIND_DATA 1";
 
       if (s.need_lines)
-        s.op->newline() << "#define STP_NEED_LINE_DATA 1";
+        s.op->hdr->newline() << "#define STP_NEED_LINE_DATA 1";
 
       // Emit the total number of probes (not regarding merged probe handlers)
-      s.op->newline() << "#define STP_PROBE_COUNT " << s.probes.size();
+      s.op->hdr->newline() << "#define STP_PROBE_COUNT " << s.probes.size();
+
+      s.op->hdr->newline() << "#if (defined(__arm__) || defined(__i386__) "
+        "|| defined(__x86_64__) || defined(__powerpc64__)) "
+        "|| defined (__s390x__) || defined(__aarch64__) || defined(__mips__)\n"
+        "#ifdef STP_NEED_UNWIND_DATA\n"
+        "#ifndef STP_USE_DWARF_UNWINDER\n"
+        "#define STP_USE_DWARF_UNWINDER\n"
+        "#endif\n"
+        "#endif\n"
+        "#endif";
+
+      s.op->hdr->close ();
 
       // Emit systemtap_module_refresh() prototype so we can reference it
       s.op->newline() << "static void systemtap_module_refresh (const char* modname);";
diff --git a/translator-output.cxx b/translator-output.cxx
index a28d6f8d1..370fe8124 100644
--- a/translator-output.cxx
+++ b/translator-output.cxx
@@ -15,7 +15,7 @@
 using namespace std;
 
 translator_output::translator_output (ostream& f):
-  buf(0), o2 (0), o (f), tablevel (0), trailer_p(false)
+  buf(0), o2 (0), o (f), tablevel (0), trailer_p(false), hdr (NULL)
 {
 }
 
@@ -26,11 +26,33 @@ translator_output::translator_output (const string& filename, size_t bufsize):
   o (*o2),
   tablevel (0),
   filename (filename),
-  trailer_p (false)
+  trailer_p (false),
+  hdr (NULL)
 {
   o2->rdbuf()->pubsetbuf(buf, bufsize);
 }
 
+
+void
+translator_output::new_common_header (ostream& f)
+{
+  if (hdr)
+    delete hdr;
+
+  hdr = new translator_output (f);
+}
+
+
+void
+translator_output::new_common_header (const string& filename, size_t bufsize)
+{
+  if (hdr)
+    delete hdr;
+
+  hdr = new translator_output (filename, bufsize);
+}
+
+
 void
 translator_output::close()
 {
diff --git a/translator-output.h b/translator-output.h
index f7a108642..e3d815299 100644
--- a/translator-output.h
+++ b/translator-output.h
@@ -27,10 +27,15 @@ public:
   std::string filename;
   bool trailer_p; // is this file to be linked before or after main generated source file
 
+  translator_output* hdr;  /* for stap_common.h file */
+
   translator_output (std::ostream& file);
   translator_output (const std::string& filename, size_t bufsize = 8192);
   ~translator_output ();
 
+  void new_common_header (std::ostream& file);
+  void new_common_header (const std::string& filename, size_t bufsize = 8192);
+
   void close ();
   
   std::ostream& newline (int indent = 0);
--
2.17.2