TCL Talks

Check-in [4484e7c8f6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add cg_anno.tcl to the repository. Other cleanup of the slides.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 4484e7c8f626ea3dffa299c54d0ef8507157be44
User & Date: drh 2016-06-10 01:24:22.810
Context
2016-06-10
10:38
Move the eurotcl2016 talk into its own subdirectory. check-in: e9edc2eb0d user: drh tags: trunk
01:24
Add cg_anno.tcl to the repository. Other cleanup of the slides. check-in: 4484e7c8f6 user: drh tags: trunk
00:28
Add a link to the slides on the title page of the EuroTCL 2016 talk. check-in: 060e434582 user: drh tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Added cg_anno.txt.
















































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/usr/bin/tclsh
#
# A wrapper around cg_annotate that sets appropriate command-line options
# and rearranges the output so that annotated files occur in a consistent
# sorted order.
#

set in [open "|cg_annotate --show=Ir --auto=yes --context=40 $argv" r]
set dest !
set out(!) {}
while {![eof $in]} {
  set line [string map {\t {        }} [gets $in]]
  if {[regexp {^-- Auto-annotated source: (.*)} $line all name]} {
    set dest $name
  } elseif {[regexp {^-- line \d+ ------} $line]} {
    set line [lreplace $line 2 2 {#}]
  } elseif {[regexp {^The following files chosen for } $line]} {
    set dest !
  }
  append out($dest) $line\n
}
foreach x [lsort [array names out]] {
  puts $out($x)
}
Changes to eurotcl2016-microopt.html.
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
<h3>Amalgation vs. Separate Files</h3>
<table>
<thead>
<tr><th>Build<th>CPU Cycles<th>Performance
</thead>
<tbody>
<tr><td>Separate -Os<td>1532.966911<td>baseline
<tr><td>Separate -O6<td>1522.18922<td>-0.7%
<tr><td>Amalgamation -Os<td>1459.618198<td>-4.8%
<tr><td>Amalgamation -O6<td>1430.548506<td>-6.7%
</tbody>
</table>
</section>

<section>







|







63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
<h3>Amalgation vs. Separate Files</h3>
<table>
<thead>
<tr><th>Build<th>CPU Cycles<th>Performance
</thead>
<tbody>
<tr><td>Separate -Os<td>1532.966911<td>baseline
<tr><td>Separate -O6<td>1522.189222<td>-0.7%
<tr><td>Amalgamation -Os<td>1459.618198<td>-4.8%
<tr><td>Amalgamation -O6<td>1430.548506<td>-6.7%
</tbody>
</table>
</section>

<section>
121
122
123
124
125
126
127

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169


170
171
172
173
174
175
176
<li> Make code changes
<li> Test, test, test....
<li> Goto 1
</ol>
</section>

<section>

<h2>Running The Workload</h2>
<pre>

  rm -f cachegrind.out.*
  make clean tclsh
  valgrind --tool=cachegrind ./tclsh workload.tcl
  cg_anno.tcl cachegrind.out.* >cout-current.txt
  fossil test-diff --tk cout-baseline.tcl cout-current.txt &

</pre>
</section>

<section>
<h2><a href='http://www.sqlite.org/tmp/cg_anno.tcl'>cg_anno.tcl</a></h2>
<pre><code>
  #!/usr/bin/tclsh
  #
  # A wrapper around cg_annotate that sets appropriate command-line options
  # and rearranges the output so that annotated files occur in a consistent
  # sorted order.
  #
  set in [open "|cg_annotate --show=Ir --auto=yes --context=40 $argv" r]
  set dest !
  set out(!) {}
  while {![eof $in]} {
    set line [gets $in]
    set line [string map {\t {        }} [gets $in]]
    if {[regexp {^-- Auto-annotated source: (.*)} $line all name]} {
      set dest $name
    } elseif {[regexp {^-- line \d+ ------} $line]} {
      set line [lreplace $line 2 2 {#}]
    } elseif {[regexp {^The following files chosen for } $line]} {
      set dest !
    }
    append out($dest) $line\n
  }
  foreach x [lsort [array names out]] {
    puts $out($x)
  }
  
</code></pre>
</section>


<section>
<p>
<a href='https://www.sqlite.org/src/info/618d8dd4ff4' target='_blank'>
https://www.sqlite.org/src/info/618d8dd4ff4</a></p>
<pre>

 514,274  const int bMmapOk = (pgno<font color='red'>!=</font>1 && USEFETCH(pPager)







>
|
|







|
|

|
|
|
|
<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
|
|
>
>







121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144





145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

163
164
165
166
167
168
169
170
171
172
173
<li> Make code changes
<li> Test, test, test....
<li> Goto 1
</ol>
</section>

<section>
  <section>
  <h2>Running The Workload</h2>
  <pre>

  rm -f cachegrind.out.*
  make clean tclsh
  valgrind --tool=cachegrind ./tclsh workload.tcl
  cg_anno.tcl cachegrind.out.* >cout-current.txt
  fossil test-diff --tk cout-baseline.tcl cout-current.txt &

  </pre>
  </section>

  <section>
  <h2><a href='./cg_anno.txt' target='_blank'>cg_anno.tcl</a></h2>
  <pre>
 #!/usr/bin/tclsh





 set in [open "|cg_annotate --show=Ir --auto=yes $argv" r]
 set dest !
 set out(!) {}
 while {![eof $in]} {
   set line [gets $in]
   set line [string map {\t {        }} [gets $in]]
   if {[regexp {^-- Auto-annotated source: (.*)} $line all nm]} {
     set dest $nm
   } elseif {[regexp {^-- line \d+ ------} $line]} {
     set line [lreplace $line 2 2 {#}]
   } elseif {[regexp {^The following files chosen for } $line]} {
     set dest !
   }
   append out($dest) $line\n
 }
 foreach x [lsort [array names out]] {
   puts $out($x)
 }

  </pre>
  </section>
</section>

<section>
<p>
<a href='https://www.sqlite.org/src/info/618d8dd4ff4' target='_blank'>
https://www.sqlite.org/src/info/618d8dd4ff4</a></p>
<pre>

 514,274  const int bMmapOk = (pgno<font color='red'>!=</font>1 && USEFETCH(pPager)
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
</pre>
</section>

<section>
<h2>Cachegrind Output</h2>
<pre>

--------------------------------------------------------------
         Ir 
--------------------------------------------------------------
262,402,580  PROGRAM TOTALS

--------------------------------------------------------------
        Ir  file:function
--------------------------------------------------------------
49,838,612  /tmp/generic/tclExecute.c:TEBCresume
32,018,100  /tmp/generic/tclNamesp.c:TclGetNamespaceForQualNam
26,221,282  /tmp/generic/tclVar.c:TclObjLookupVarEx
24,875,355  /tmp/generic/tclVar.c:ObjFindNamespaceVar
24,844,452  /tmp/generic/tclHash.c:CreateHashEntry
21,851,978  /tmp/generic/tclVar.c:TclLookupSimpleVar
 8,220,562  /tmp/generic/tclObj.c:TclHashObjKey
 7,623,937  /tmp/generic/tclVar.c:TclPtrSetVar
<font color='red'> 6,500,844  /tmp/generic/tclVar.c:CompareVarKeys</font>
 6,169,552  pthread_getspecific.c:pthread_getspecific
 5,403,119  /tmp/generic/tclUtil.c:Tcl_DStringFree
 4,369,586  /tmp/generic/tclVar.c:FreeParsedVarName
 3,741,590  /tmp/generic/tclThreadAlloc.c:TclpAlloc
 3,682,721  /tmp/generic/tclEncoding.c:UtfToUtfProc.isra.0

</pre>
</section>

<section>
  <section>
  <pre>
  
          .  static int
          .  CompareVarKeys(
          .      void *keyPtr, 
          .      Tcl_HashEntry *hPtr)
  2,016,738  {
          .      Tcl_Obj *objPtr1 = keyPtr;
    336,123      Tcl_Obj *objPtr2 = hPtr->key.objPtr;
          .      register const char *p1, *p2;
          .      register int l1, l2;







<
<
<
|
<
|
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







|
<







214
215
216
217
218
219
220



221

222


223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

246
247
248
249
250
251
252
</pre>
</section>

<section>
<h2>Cachegrind Output</h2>
<pre>




 262,402,580  PROGRAM TOTALS

 --------------------------------------------------------------


  49,838,612  /tmp/generic/tclExecute.c:TEBCresume
  32,018,100  /tmp/generic/tclNamesp.c:TclGetNamespaceForQualNam
  26,221,282  /tmp/generic/tclVar.c:TclObjLookupVarEx
  24,875,355  /tmp/generic/tclVar.c:ObjFindNamespaceVar
  24,844,452  /tmp/generic/tclHash.c:CreateHashEntry
  21,851,978  /tmp/generic/tclVar.c:TclLookupSimpleVar
   8,220,562  /tmp/generic/tclObj.c:TclHashObjKey
   7,623,937  /tmp/generic/tclVar.c:TclPtrSetVar
  <font color='red'> 6,500,844  /tmp/generic/tclVar.c:CompareVarKeys</font>
   6,169,552  pthread_getspecific.c:pthread_getspecific
   5,403,119  /tmp/generic/tclUtil.c:Tcl_DStringFree
   4,369,586  /tmp/generic/tclVar.c:FreeParsedVarName
   3,741,590  /tmp/generic/tclThreadAlloc.c:TclpAlloc
   3,682,721  /tmp/generic/tclEncoding.c:UtfToUtfProc.isra.0
  
</pre>
</section>

<section>
  <section>
  <pre>
  
          .  static int CompareVarKeys(

          .      void *keyPtr, 
          .      Tcl_HashEntry *hPtr)
  2,016,738  {
          .      Tcl_Obj *objPtr1 = keyPtr;
    336,123      Tcl_Obj *objPtr2 = hPtr->key.objPtr;
          .      register const char *p1, *p2;
          .      register int l1, l2;
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

291
292
293
294
295
296
297
298
299
300
301

  <section>
  <h3>CPU Registers</h3>
  <p><img src='registers.jpg' class='plain'></img></p>
  </section>

  <section>
  <h3>Registers Must Be Saved If...</h3>
  <ul>
  <li> Complex function &mdash; too many registers needed
  <li> Subroutines are called, except tail recursion
  </ul>
  </section>
</section>

<section>

<pre>

        .  static int
        .  CompareVarKeys(
        .      void *keyPtr,
        .      Tcl_HashEntry *hPtr)
        .  {
        .      Tcl_Obj *objPtr1 = keyPtr;
  336,123      Tcl_Obj *objPtr2 = hPtr->key.objPtr;
  672,246      if (objPtr1 == objPtr2) {
        .          return 1;







|

|
|





>
|

|
<







265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284

285
286
287
288
289
290
291

  <section>
  <h3>CPU Registers</h3>
  <p><img src='registers.jpg' class='plain'></img></p>
  </section>

  <section>
  <h3>Registers must push to stack if...</h3>
  <ul>
  <li> Many registers needed &mdash; complex function
  <li> Subroutines called, except tail recursion
  </ul>
  </section>
</section>

<section>
  <section>
  <pre>

        .  static int CompareVarKeys(

        .      void *keyPtr,
        .      Tcl_HashEntry *hPtr)
        .  {
        .      Tcl_Obj *objPtr1 = keyPtr;
  336,123      Tcl_Obj *objPtr2 = hPtr->key.objPtr;
  672,246      if (objPtr1 == objPtr2) {
        .          return 1;
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
   90,126      const char *p1 = TclGetString(objPtr1);
   30,042      int l1 = objPtr1->length;
   90,126      const char *p2 = TclGetString(objPtr2);
        .      int l2 = objPtr2->length;
  270,378      return ((l1 == l2) && !memcmp(p1, p2, l1));
  120,168  }

</pre>
</section>

<section>
<h2>TCL_NOINLINE</h2>
<pre>

  #if defined(__GNUC__)
  #  define TCL_NOINLINE  __attribute__((noinline))
  #elif defined(_MSC_VER) && _MSC_VER>=1310
  #  define TCL_NOINLINE  __declspec(noinline)
  #else
  #  define TCL_NOINLINE
  #endif

</pre>
<p>Currently on branch
<a href='https://core.tcl.tk/tcl/timeline?r=mig-opt2' target='_blank'>mig-opt2</a>.
Not yet on trunk</p>

</section>

<section>
<h2>End Result</h2>
<ul>
<li> Saved about 4M cycles out of 262M &rarr; 1.5% faster!
<li> This change is not checked in because Miguel found a better
     way to fix the frequent calls to CompareVarKeys()
</ul>
</section>

<section>
<pre>

  -----------------------------------------------------
             Ir 
  -----------------------------------------------------
  2,021,099,349  PROGRAM TOTALS
  
  -----------------------------------------------------
           Ir  file:function
  -----------------------------------------------------
  390,649,348  /tmp/generic/tclExecute.c:TEBCresume
  <font color='red'>150,647,394  ???:pthread_getspecific</font>
  123,209,668  /tmp/generic/tclHash.c:CreateHashEntry
  118,133,666  /tmp/generic/tclObj.c:TclHashObjKey
   74,396,324  /tmp/generic/tclVar.c:UnsetVarStruct
   73,162,765  /tmp/generic/tclThreadAlloc.c:TclpAlloc







|
|

|
|
|









|
|
|
|
>














<
<
<

<
<
<







300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340



341



342
343
344
345
346
347
348
   90,126      const char *p1 = TclGetString(objPtr1);
   30,042      int l1 = objPtr1->length;
   90,126      const char *p2 = TclGetString(objPtr2);
        .      int l2 = objPtr2->length;
  270,378      return ((l1 == l2) && !memcmp(p1, p2, l1));
  120,168  }

  </pre>
  </section>

  <section>
  <h2>TCL_NOINLINE</h2>
  <pre>

  #if defined(__GNUC__)
  #  define TCL_NOINLINE  __attribute__((noinline))
  #elif defined(_MSC_VER) && _MSC_VER>=1310
  #  define TCL_NOINLINE  __declspec(noinline)
  #else
  #  define TCL_NOINLINE
  #endif

  </pre>
  <p>Currently on branch
  <a href='https://core.tcl.tk/tcl/timeline?r=mig-opt2' target='_blank'>mig-opt2</a>.
  Not yet on trunk</p>
  </section>
</section>

<section>
<h2>End Result</h2>
<ul>
<li> Saved about 4M cycles out of 262M &rarr; 1.5% faster!
<li> This change is not checked in because Miguel found a better
     way to fix the frequent calls to CompareVarKeys()
</ul>
</section>

<section>
<pre>




  2,021,099,349  PROGRAM TOTALS



  -----------------------------------------------------
  390,649,348  /tmp/generic/tclExecute.c:TEBCresume
  <font color='red'>150,647,394  ???:pthread_getspecific</font>
  123,209,668  /tmp/generic/tclHash.c:CreateHashEntry
  118,133,666  /tmp/generic/tclObj.c:TclHashObjKey
   74,396,324  /tmp/generic/tclVar.c:UnsetVarStruct
   73,162,765  /tmp/generic/tclThreadAlloc.c:TclpAlloc
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
11,578,728     return pthread_getspecific(key);
         2  }

</pre>
</section>

<section>
<h2><a href='https://core.tcl.tk/tcl/info/fdbf64dc501ff0a0' target='_blank'>
Check-in [fdbf64dc50]</a></h2>
<pre>

         .  void *
         .  TclpGetAllocCache(void)
         .  {
11,578,728      return pthread_getspecific(key);
         .  }







|
|







368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
11,578,728     return pthread_getspecific(key);
         2  }

</pre>
</section>

<section>
<h3><a href='https://core.tcl.tk/tcl/info/fdbf64dc501ff0a0' target='_blank'>
Check-in [fdbf64dc50]</a></h3>
<pre>

         .  void *
         .  TclpGetAllocCache(void)
         .  {
11,578,728      return pthread_getspecific(key);
         .  }