Example reports
Below is some example output after a short session profiling on my machine.
Note that the reports have been abridged for readability; links to the full
output are provided.
System-wide binary image summary
$ opreport --exclude-dependent
CPU: PIII, speed 863.195 MHz (estimated)
Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 50000
450385 75.6634 cc1plus
60213 10.1156 lyx
29313 4.9245 XFree86
11633 1.9543 as
10204 1.7142 oprofiled
7289 1.2245 vmlinux
7066 1.1871 bash
6417 1.0780 oprofile
6397 1.0747 vim
3027 0.5085 wineserver
1165 0.1957 kdeinit
832 0.1398 wine
...
Full output (1Kb)
System-wide binary image summary including per-application libraries
$ opreport
CPU: PIII, speed 863.195 MHz (estimated)
Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 50000
506605 54.0125 cc1plus
450385 88.9026 cc1plus
28201 5.5667 libc-2.3.2.so
27194 5.3679 vmlinux
677 0.1336 uhci_hcd
89 0.0176 usbcore
51 0.0101 ld-2.3.2.so
...
163209 17.4008 lyx
60213 36.8932 lyx
23881 14.6322 libc-2.3.2.so
21968 13.4600 libstdc++.so.5.0.1
13676 8.3794 libpthread-0.10.so
12988 7.9579 libfreetype.so.6.3.1
10375 6.3569 vmlinux
8599 5.2687 libqt-mt.so.3.0.5
4190 2.5673 ld-2.3.2.so
4157 2.5470 libfontconfig.so.1.0
...
58962 6.2863 kdeinit
16971 28.7829 libqt-mt.so.3.0.5
11533 19.5601 libkonsolepart.so
...
...
Full output (13Kb)
System-wide symbol summary including per-application libraries
$ opreport --symbols --image-path=/lib/modules/2.5.75/kernel/
CPU: PIII, speed 863.195 MHz (estimated)
Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 50000
vma samples % image name app name symbol name
00000000 450385 48.1828 cc1plus cc1plus (no symbols)
00000000 11617 1.2428 as as (no symbols)
4207d260 7206 0.7709 libc-2.3.2.so cc1plus memset
00000000 7046 0.7538 bash bash (no symbols)
080c0f70 6966 0.7452 XFree86 XFree86 CheckPassiveGrabsOnWindow
00000000 6397 0.6844 vim vim (no symbols)
00032150 5843 0.6251 libkonsolepart.so kdeinit TEWidget::setImage(ca const*, int, int)
420745e0 4928 0.5272 libc-2.3.2.so cc1plus _int_malloc
0804c5a0 4462 0.4774 oprofiled oprofiled odb_insert
00009f30 4154 0.4444 libpthread-0.10.so lyx __pthread_alt_unlock
0003b990 4143 0.4432 libkonsolepart.so kdeinit TEScreen::getCookedImage()
c01163d0 3865 0.4135 vmlinux cc1plus mark_offset_tsc
c0155ef0 3800 0.4065 vmlinux bash do_wp_page
c0156b50 3734 0.3995 vmlinux cc1plus do_anonymous_page
00000000 3410 0.3648 libX11.so.6.2 kdeinit (no symbols)
0810c4ec 3323 0.3555 lyx lyx Paragraph::getFontSettings(BufferParams const&, int) const
...
Full output (compressed, 130Kb)
Symbol summary for a single application
$ opreport --exclude-dependent --demangle=smart --symbols `which lyx`
CPU: PIII, speed 863.195 MHz (estimated)
Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 50000
vma samples % symbol name
081ec974 5016 8.5096 _Rb_tree<unsigned short, pair<unsigned short const, int>, unsigned short const>::find(unsigned short const&)
0810c4ec 3323 5.6375 Paragraph::getFontSettings(BufferParams const&, int) const
081319d8 3220 5.4627 LyXText::getFont(Buffer const*, Paragraph*, int) const
080e45d8 3011 5.1082 LyXFont::realize(LyXFont const&)
080e3d78 2623 4.4499 LyXFont::LyXFont()
081255a4 1823 3.0927 LyXText::singleWidth(BufferView*, Paragraph*, int, char) const
080e3cf0 1804 3.0605 operator==(LyXFont::FontBits const&, LyXFont::FontBits const&)
081128e0 1729 2.9332 Paragraph::Pimpl::getChar(int) const
081ed020 1380 2.3412 font_metrics::width(char const*, unsigned, LyXFont const&)
08110d60 1310 2.2224 Paragraph::getChar(int) const
081ebc94 1227 2.0816 qfont_loader::getfontinfo(LyXFont const&)
...
Full output (compressed, 16Kb)
Symbol summary for a single application including libraries
$ opreport --demangle=smart --symbols `which lyx`
CPU: PIII, speed 863.195 MHz (estimated)
Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 50000
vma samples % image name symbol name
081ec974 5016 3.1034 lyx _Rb_tree<unsigned short, pair<unsigned short const, int>, unsigned short const>::find(unsigned short const&)
00009f30 4154 2.5701 libpthread-0.10.so __pthread_alt_unlock
0810c4ec 3323 2.0559 lyx Paragraph::getFontSettings(BufferParams const&, int) const
081319d8 3220 1.9922 lyx LyXText::getFont(Buffer const*, Paragraph*, int) const
080e45d8 3011 1.8629 lyx LyXFont::realize(LyXFont const&)
0000a120 2853 1.7652 libpthread-0.10.so __pthread_alt_lock
080e3d78 2623 1.6229 lyx LyXFont::LyXFont()
00069a10 2467 1.5263 libstdc++.so.5.0.1 string::find(char const*, unsigned, unsigned) const
0006a430 2274 1.4069 libstdc++.so.5.0.1 string::compare(char const*) const
4201e850 2169 1.3420 libc-2.3.2.so __GI_setlocale
4207d870 1982 1.2263 libc-2.3.2.so memcpy
...
Full output (compressed, 43Kb)
Image summary for a single application
$ opreport `which lyx`
CPU: PIII, speed 863.195 MHz (estimated)
Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 50000
163209 100.0000 lyx
60213 36.8932 lyx
23881 14.6322 libc-2.3.2.so
21968 13.4600 libstdc++.so.5.0.1
13676 8.3794 libpthread-0.10.so
12988 7.9579 libfreetype.so.6.3.1
10375 6.3569 vmlinux
8599 5.2687 libqt-mt.so.3.0.5
4190 2.5673 ld-2.3.2.so
...
Full output (2Kb)
Call-graph output for a single application
$ opreport -cl --demangle=smart `which lyx`
...
-------------------------------------------------------------------------------
1 0.0275 lyx-qt LyXText::getColumnNearX(int, Row const&, int&, bool&) const
1 0.0275 lyx-qt LyXText::computeRowMetrics(int, Row const&) const
1 0.0275 lyx-qt numberOfSeparators(Paragraph const&, Row const&)
1 0.0275 lyx-qt DocIterator::forwardPos()
2 0.0550 lyx-qt Paragraph::isHfill(int) const
6 0.1650 lyx-qt DocIterator::forwardPar()
7 0.1925 lyx-qt RowPainter::paintText()
8 0.2200 lyx-qt Paragraph::isNewline(int) const
9 0.2475 lyx-qt RowPainter::singleWidth(int) const
28 0.7699 lyx-qt LyXText::redoParagraph(int)
29 0.7974 lyx-qt RowPainter::paintFromPos(int&)
47 1.2923 lyx-qt LyXText::singleWidth(Paragraph const&, int) const
128 3.5194 lyx-qt RowPainter::paintChars(int&, bool, bool)
141 3.8768 lyx-qt Paragraph::isSeparator(int) const
400 10.9981 lyx-qt Paragraph::isLineSeparator(int) const
420 11.5480 lyx-qt LyXText::setRowWidth(int, Row&) const
473 13.0052 lyx-qt LyXText::rowBreakPoint(int, Row&) const
1935 53.2032 lyx-qt Paragraph::isInset(int) const
446 1.0910 lyx-qt Paragraph::getChar(int) const
1609 44.2276 lyx-qt vector<char>::size() const
1113 30.5937 lyx-qt vector<char>::operator[](unsigned int) const
446 12.2595 lyx-qt Paragraph::getChar(int) const [self]
188 5.1677 lyx-qt vector<char>::begin() const
79 2.1715 lyx-qt vector<char>::const_iterator::difference_type __gnu_cxx::operator-<char const*, char const*, vector<char>>(vector<char>::const_iterator const&, vect
or::const_iterator const&)
74 2.0341 lyx-qt vector<char>::end() const
72 1.9791 lyx-qt vector<char>::const_iterator::operator+(int const&) const
57 1.5668 lyx-qt vector<char>::const_iterator::operator*() const
-------------------------------------------------------------------------------
...
Full output (compressed, 132Kb)
Annotate mixed source/assembly
$ opannotate --source --assembly `which oprofiled`
...
: index = hash->hash_base[odb_do_hash(hash, key)];
: while (index) {
1455 14.2689 : 804c5c2: test %eax,%eax
13 0.1275 : 804c5c4: je 804c5ef
: if (index <= 0 || index >= hash->descr->current_size) {
12 0.1177 : 804c5c6: mov 0x8(%esi),%ecx
2 0.0196 : 804c5c9: lea 0x0(%esi,1),%esi
19 0.1863 : 804c5d0: cmp 0x4(%ecx),%eax
78 0.7649 : 804c5d3: jae 804c638 <odb_insert+0x98>
: char * err_msg;
: asprintf(&err_msg, "odb_insert() invalid index %u\n",
: index);
: odb_set_error(hash, err_msg);
: return EXIT_FAILURE;
: }
: node = &hash->node_base[index];
12 0.1177 : 804c5d5: lea (%eax,%eax,2),%edx
41 0.4021 : 804c5d8: mov (%esi),%eax
: 804c5da: lea (%eax,%edx,4),%ebx
...
Full output (compressed, 24Kb)
Annotated source
$ opannotate --source --output-dir=/home/moz/src/annotated `which oprofiled`
$ vi /home/moz/src/annotated/home/moz/src/oprofile/daemon/opd_image.c # the annotated source output
...
:static uint64_t pop_buffer_value(struct transient * trans)
254 2.4909 :{ /* pop_buffer_value total: 2105 20.6433 */
: uint64_t val;
:
160 1.5691 : if (!trans->remaining) {
: fprintf(stderr, "BUG: popping empty buffer !\n");
: exit(EXIT_FAILURE);
: }
:
: val = get_buffer_value(trans->buffer, 0);
123 1.2062 : trans->remaining--;
65 0.6374 : trans->buffer += kernel_pointer_size;
: return val;
230 2.2556 :}
...
Full output (tarball, 24Kb)
Rules of Optimization: Rule 1: Don't do it. Rule 2 (for experts only): Don't do it yet.
- M.A. Jackson
2020/07/20