-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpgdb.py
executable file
·3008 lines (2696 loc) · 114 KB
/
pgdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# vi: set tabstop=8 expandtab softtabstop=4 shiftwidth=4
"""
Copyright (c) 2015-2019 Duane Voth
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
# (that's the BSD 3-clause)
#
#
# Python GDB RDP client (replaces gdb for QEMU tcp debug)
#
# why:
# - gdb probably won't ever understand non-gas assemblers (like NASM)
# - gdb still doesn't deal with multiple cores (nicely? at all?)
# - gdb is seriously cumbersome, its high time we raised the bar.
#
# resources:
# https://sourceware.org/gdb/onlinedocs/gdb/Remote-Protocol.html
#
# usage:
# $ qemu-system-i386 -s -S .... (opens a window (or not) and stops)
# (then from a separate terminal)
# $ python pgdb_x86.py -nasmlst myasmcode.lst -objdump myccode.lst ...
# (args are read left to right so shell wildcards work)
# $ python pgdb_x86.py -nasmlst src/{a,b,c}.lst -gccmap mapfiles/*.map
#
# ncurses issues:
# - TERM=rxvt-unicode-265color is the default for urxvt but the term info
# definition seems to be in many ways wrong - in pgdb the key codes
# are totally broken.
# a workaround is coded in below (look for os.environ['TERM'] = 'rxvt')
# or alternatively run pgdb as $ TERM=rxvt python pgdb_x86.py ...
#
# how pgdb works:
# pgdb is an event driven ncurses interface to any gdb 'backend' tucked
# into a debugger or emulator. qemu (started above) with -s -S halts
# emulation before the first instruction is executed and allows a debugger
# to be attached (that's pgdb). when pgdb is loaded it looks for the
# gdb socket and initiates the RDB protocol to find out what cpu arch
# is running, the contents of all the registers, and then if a listing
# file, or map file with references to listings, has been mentioned on
# the pgdb command line, locates where the current program counter is
# within the listings.
#
# implementation notes:
# - apologies: this code is rather dense. the good news is however
# that it is actually pretty easy to debug: because pgdb runs
# independently of the gdb debugger/emulator, so it can be exited
# and restarted without disturbing the debugger/emulator. thus, if
# pgdb just got it's panties in a wad, you can exit pgdb with 'q'
# (if it hasn't already crashed), hunt around and add Log.write()
# statements to the .py, and then rerun the pgdb command line to
# see if you found the problem.
# - HOWEVER, if pgdb crashes after ncurses has been init'ed but before
# the Log window is up (typical for the convoluted load_gccmap_file
# function) you will want to enable 'logfile' below, let pgdb crash,
# and then study pgdb.DBG
# - i've saved some of my useful Log.write()s to help debug; search for
# 'DEBUG' to find useful statements that if uncommented might help.
# - pgdb is a purely event driven app. Keyboard events and gdb-rdp-tcp
# receive events drive ALL actions.
# - update_status('...', CPdbg) is a possible one-line debug mechanism
# that writes to the top line of the screen, or use
# Log.write('...\n', CPdbg) for multi-line.
# - gdb rdp (remote debug protocol) thinks in 'threads' (based from 1)
# but pgdb thinks in 'cpus' (based from 0).
# - the words 'panel' and 'window' seem to be used interchangeably,
# but no, windows are 'seen' by the user, panels are the software
# objects that often result in a window.
# - specific architectures have different names for the instruction
# pointer - but pgdb simply labels them all as 'ip'. sorry.
# - when using a lot of cpu cores, it can take a while to refetch all
# the regs via rdp after each single-step or emulator stop, and many
# display events don't fire until all the data has been retrieved.
# if you are running with more than 8 cores, be patient!
# - python exceptions go to the log window once the main loop runs.
# you can scroll back and forward with ctrl-pgup and ctrl-pgdown ...
# - my approach to tracking program flow (by doing text searches through
# .lst and .map files) is *not* deterministic and will undoubtedly
# give incorrect results in certain complex situations with overlapping
# logical address spaces. and there is some squirrely code to mitigate
# this of which I'm not entirely proud. but this approach allows us
# to more easily trace rom-able code where we may only have a binary
# image, a lst file, and maybe a map file. I will argue against trying
# to make pgdb handle elf executables and object files if it means
# breaking the current *text* lst/map file based approach. (but feel
# free to fork a "different" version ... ;)
# - perhaps why others have not tried this follow-the-listing-file approach
# is that the gcc tool chain (using objdump) does not seem to have well
# defined listing file formats. I wrote this initially for NASM et. al.
# but of course the 800 pound gcc toolchain want's to play too - so I've
# hacked in support for gcc's objdump. at the moment the problem is
# that hand coded 'as' source generates listing files (via objdump)
# where source labels do not have a corresponding symbol in the symbol
# table (ie. you have to manually add .global in the .s to make a label
# a symbol) and without a symbol (that gets fixed up by the linker) we
# can't match the current ip with the source line that generated the
# next instruction.
# - "pinning" the source window is one workaround for overlapping address
# spaces. Pressing a number key twice pins a source window; pressing
# any other number key unpins the current source window and switches to
# the other. sometimes a very clear better choice presents itself,
# which will cause an automatic switch.
# - when generating .lst files using gcc and objdump, be sure to use:
# $ gcc -O0 ...
# - segment support may be questionable, but for non-segmented architectures,
# defining a segment to act like an overlay works fine; where each
# overlay gets a unique number. segments need to be supported because
# some popular architectures have them ...
# - qemu-system-arm -kernel (v2.4ish) for some reason reports the pc values
# to be *relative* to the start address at 0x10000. this messes
# everything up. for now, a *negative* segment offset brings the
# symbols into line - yeah it's a hack ...
# - nasm happily allows you to put data in your .text segment.
# pgdb however keeps symbols found in the .text segment separate from
# symbols found in the .data segment. text symbols work for breakpoint
# addresses, data symbols work for memory and watchpoint addresses.
# if at a mem address prompt you type gdt@mygdt and pgdb says 'error
# in [mygdt] at mygdt' then maybe you are missing a .data section.
# - yo bro, why isn't this verbage all in some README?
# because my young padawan, the less your crap is spread out, the
# easier it is to clean up. (and the docs won't be so easily lost)
#
# ---- architecture support modules: alter egos and modes ----
# pgdb can do something a bit unusual: it can *reload* the arch module
# on the fly *if* the emulated processor transforms itself into some
# alter ego. this is what Arch, Arch_name, load_arch_module() are
# about. however, some processors let their cores run in different
# modes simultaneously (eg. x86). multiple *concurrent* cpu modes are
# handled by a single arch module; but if some configuration flag in a
# procesor status word causes all the cores on the chip to change at
# once, then separate arch modules may be best approach with an alter
# ego switch (eg. load_arch_module()) on the fly when the flag changes.
# pretty sure I haven't thought it all out let alone tested it ...
#
# confusing all this is how qemu is gradually evolving to usefullness.
# with qemu v3.1.0, x86 regs are finally sent to us via xml, but there
# is still no indication when an x86 cpu switched from 32b to 64b or
# back. there is probably some discussion among the devs about how
# to do this properly and efficiently; for example, it would be cool if
# qXfer:features:read could give us a unique arch string that we could
# simply map to a module name, but various qemu binaries don't support
# features:read yet. (and qemu has to deal with the multiple mode
# thing too - how is it going to telegraph that a new xml file needs
# to be retrieved?) right now, if features:read isn't supported, the
# only qemu signal that lets pgdb know what mode/architecture is being
# emulated is the LENGTH of the returned data for the rdp 'g' (get
# registers) command! (we pray that this will continue to be unique
# for each arch/alterego/mode) and thus the current pgdb logic is:
# - if qXfer:features:read, pick the arch module from the xml name
# else use the user defined default (or command line -arch)
# - the emulated processor mode is based on the returned
# 'g' reg set length and can change at any time for any core
#
# todo:
# - handle multiple breakpoints, let some remain active across continues.
# - better support for multiple memory windows: auto update, highlight in
# yellow which bytes have changed - should be able to embed the fancy
# \a \t control characters and use ccs() to make it a small bit simpler.
# - better expressions for memory addresses (allow more math and segment
# regs and do selector lookups for x86 protected mode)
# - add an 'x' command to modify memory ...
# - add an 'r' command to modify registers ...
# - properly fetch multiple memory regions with chained rdp fetches.
# this is being done currently but it is an accident that it sometimes
# works correctly -- what breaks is if you put up two mem windows with
# the same starting address (or try to display flat memory AND a data
# structure over the same region at the same time). multiple requests
# for memory region dumps cannot be distinguished, so funneling them
# all through the rdp interface causes downstream display events to
# get confused or lost. what is needed is a memory 'cache'. all
# memory windows should pool their requests and share their answers
# so overlapping regions don't cause multiple rdp fetches and one
# answer can update all interested memory windows. but this will be
# a significant enhancement ...
# - fetch complex data structures via rdp (ie. gdt/ldt/idt with multiple
# chained lookups). once the above is fixed, it would be nice to
# pull segment the selectors for each segment register in use by a cpu.
# - if terminal resize shrinks display and cuts off windows - they can
# no longer be moved. resize enlarge however won't unfreeze moves!
# (probably a curses bug - can we watch for resize events and relocate
# windows so they don't get cut off?) for now, us users shouldn't
# shrink the terminal window while pgdb is running ...
# - support for other architectures: mips, ppc, s390, etc. arm and aarch64
# are mostly solid - remember, pgdb is primarily a gdb rdp front end ...
# - finish abstracting out all the x86_32 specific code placing it in
# pgdb_x86_32.py (there is no need for classes here, python modules
# create a perfectly functional name space boundary which is all we need)
# - other architectures: x86_64, arm32, arm64, mips32, mips64, s390x, etc.
# names should match qemu build names (i386 and 64 need to be combined
# if qemu-x86_64 can switch midstream to 64bit - but I don't know yet
# when this happens - gdb has a 'set architecture i386:x86-64' command
# but it doesn't cause any rdp traffic)
# - add a general purpose disassembler per architecture so list/map/source
# are mostly optional
# - add a -structs command line flag to load 'application' data structures
# that are architecture independent. aka:
# $ pgdb -arch x86_64 -structs linux4.0.7,mydriver,yourapp
#
# future:
# - properly define source contexts and what they mean. as I've added
# features such as proper symbol support, the temporary work-arounds
# I used to support multiple source contexts have become strained.
# restricting symbol lookups to the source context in which they were
# defined is good in some cases, bad in others.
# ex: if there are no overlapping address spaces for all the code we
# are working on, then we of course want to be able to say 'show me
# mem addr X' or 'break at Y' and pgdb does the right thing regardless
# of which source file is being displayed. but if there are
# overlapping spaces, then which X or Y in what space do you mean?
# for multiple spaces, clearly, we have to remain source context
# sensitive. but then users have to select the correct source window
# before mem windows or breaks or watches can be set ...
# or maybe pgdb can prompt the user to change contexts when a symbol
# not defined in the current context exists in another?
# and shouldn't some memory windows be bound to the source contexts?
# if we are tracing through a kernel there are kernel data structures
# and there are user mode data structures - they shouldn't all be on
# the screen at once.
# - what is being said here for the objdump and the gcc tool chain?
# or for listing files in general? perhaps there is a use for an
# *integrated* listing/map file (and I don't mean that the symbols are
# simply listed at the end) - that is built from all the object-lst
# files and a map file - maybe a *single* text file detailing all
# post-link addresses, machine opcodes, and interlaced source code is
# a way to improve debugging productivity - as opposed to tools that
# try to integrate multiple files ...
# - of course, should pgdb get to the point where it needs to know which
# process in a multitasking OS is currently running, the gdb-rdp
# protocol needs to provide a new class of contextual information about
# the running OS which can be added to the cpu context (e.g. current
# process id), and then the pgdb command line can provide the proper
# source context for each process id ...
#
# gdb remote debug protocol changes that are BADLY needed:
# - the response string must include the command to which it is replying
# (solves race conditions for event driven designs that queue cmds)
# - so the 3.1.0 qemu finally has system_register support for aarch64 ...
# but not yet for x86 (sigh). anyway, rdp could also easily return
# the complete gdt/ldt descriptors for the segment regs in protected
# mode too (which would make our lives here in the debug front end
# world way easier)
#
# contributors:
# djv - Duane Voth
#
# history:
# 2015/10/12 - v0.05 - djv - released
# 2015/10/15 - v0.06 - djv - moved fads functions inside pgdb
# 2015/11/05 - v0.07 - djv - group cmdline files
# 2015/12/27 - v0.08 - djv - add cpu modes
# 2019/05/12 - v0.09 - djv - update for qemu 3.1.x (proper feature support)
Version = "PGDB v0.09 2019/05/12"
# We're fresh out of lines for the main help window - too many keys
# to document. (Note, we have to fit in 24 lines) I can't seem to
# get rid of the last blank line in ncurses windows either.
Help_text_main = \
""" h - toggles visibility of context sensitive help
l - toggles visibility of the log window
tab - rotates the active window
r - reorder windows (useful after resize)
<enter> - refresh window, if cpu make it active
<number> - select source window (twice to pin)(sh+N 11-20)
/ - text search source window (prompts for text)
n - next text search
b/w - set a breakpoint/watchpoint (prompts for addr)
v - clear all breakpoints and watchpoints
m - new memory window (prompts for address)
M - destroy active memory window
a - lookup a hex address in current source window
s/S - single step active cpu / all cpus
j/J - jump active cpu / all cpus to highlight addr
c/C - continue active cpu / all cpus
q/Q - quit pgdb / and kill qemu also
ctrl+arrows - move active window around screen
ctrl+space - raise active window to the top
ctrl+pageup/dn - scroll active window (log,mem only)
arrows,pageup,pagedn,home,end,bs - scroll source window"""
Help_text_breakpoints = \
"""
QEMU breakpoints are logical hex addresses (CS is not involved).
ESC aborts set breakpoint. A suggested breakpoint value is taken
from the address in the source window highlighted in white.
The white address is the next valid instruction pointer location
following the *focus point* on the source window (fixed at 3/4
the way down the screen). The address highlighted in yellow is
the current cpu's current IP."""
Help_text_mem_address = \
"""
Memory addresses can be simple expressions
with hex (only) constants, register names,
and * + or - (no parentheses are allowed):
ex: 40ac0
ebx + edi*2+3c
or of the form: <struct>@<addr>,<count> where
struct names are defined in the arch modules.
(count is again in hex)
ex: gdt@mygdt,8
"""
import os
import re
import sys
import string
import curses
import curses.panel
import socket
import asyncore
import traceback
import importlib
# setup Logfile support - a way to debug when ncurses isn't
# ready and printing to both stdout and stderr don't work.
# swap the 'logfile = open...' comment to enable:
logfile = None
#logfile = open('pgdb.DBG', 'w')
def Logfile(*args):
if logfile == None: return
for a in args:
logfile.write(str(a))
logfile.write('\n')
logfile.flush()
# make ctrl+pgup and ctrl+pgdn work properly for recent urxvt
if os.environ['TERM'] == 'rxvt-unicode-256color':
os.environ['TERM'] = 'rxvt'
Arch = None
Arch_name = 'x86' # neither qemu-i386 or qemu-x86_64 offer any .xml
# files, nor does qemu-alpha or the sparcs - gees,
# put your fav default here!
Host_port = ('0.0.0.0', 1234)
# ----------------------------------------------------------------------------
# early command line processing
if '-h' in sys.argv or '--help' in sys.argv:
print('usage: python pgdb.py [-remote tcp::1234] [-nasmlst <file1>] [-objdump <file2>] ...')
print()
print(Help_text_main)
print()
print(Help_text_breakpoints)
print()
print(Help_text_mem_address)
sys.exit(0)
if '-remote' in sys.argv:
idx = sys.argv.index('-remote')
sys.argv.pop(idx)
remote = ''
try:
remote = sys.argv.pop(idx)
medium, host, port = remote.split(':')
if medium != 'tcp': raise Exception('only tcp supported so far')
if len(host) == 0: host = '0.0.0.0'
port = int(port)
Host_port = (host, port)
except:
print('bad -remote arg [%s]: %s' % (remote, sys.exc_info()[1]))
sys.exit(0)
if '-arch' in sys.argv:
idx = sys.argv.index('-arch')
sys.argv.pop(idx)
Arch_name = sys.argv.pop(idx)
def load_arch_module():
# don't call this before the Log window has been defined.
global Arch
if Arch:
return
fn = 'pgdb_' + Arch_name
try:
Arch = importlib.import_module(fn)
Log.write('Cpu architecture is ' + Arch.name + ' ', attr=CPok)
Log.write('(or this qemu is old and didn\'t report a cpu type)\n')
Arch.Log = Log
Arch.DSfns = DSfns
Arch.CPerr = CPerr
except:
Log.write('unable to load %s\n' % fn, CPerr)
Log.write('try: python %s alone to check for errors\n' % fn, CPerr)
Arch = FakeArch()
class FakeArch(object):
name = 'ERSATZ'
spec = {}
cpu_maxy = 2
cpu_maxx = 6
def generate_gspec(self, name, tree): pass # called if qXfer:features:read
def alter_ego(self, n): return None
def get_seg_register(self, x): return None
def get_ip_register(self, x): return None
def cpu_reg_update(self, regs): return []
# ----------------------------------------------------------------------------
# gdb client support
Gdbc = None
Breakpoints = {} # a list of active 'Z0,xxxx,1' breakpoint commands
Watchpoints = {} # a list of active 'Z2,xxxx,n' watchpoint commands
def lsn2msn(s):
# gdb 'g' register strings are sent least-significant-nibble first
rval = ''
for i in range(len(s), 0, -2):
rval += s[i-2:i]
return rval
def dumpmem(s, addr, wth=16):
# addr is an integer
rval = []
def prntabl(c):
return chr(c) if c >= 32 and c < 127 else '.'
def bytegen(s, n):
# yield 'n' formatted hex bytes and 'n' characters per iteration
for i in range(0, len(s), wth*2): # break into lines
seg = s[i:i+wth*2]
byts = []
chrs = ''
for j in range(0, len(seg), 2): # break into bytes
byts.append(seg[j:j+2])
chrs += prntabl(int(byts[-1], 16))
yield i//2, byts, chrs
for n, hexline, chrline in bytegen(s, wth):
rval.append('0x%08x %-*s %s' % (addr+n, wth*3, ' '.join(hexline),
chrline))
return rval
def parse_xml(data):
# because ElementTree is insane overkill.
# and yes this version is incomplete. some things missing:
# - text following sub tags
# - probably doesn't handle <!DOCTYPE ...> correctly
# generates lists of tuples with 5 elements per tupple:
# (tag, attrs, pre-text, subtree, post-text)
# strip out comments
x = data.split('<!--')
if len(x) > 1:
data = x[0]
for y in x[1:]:
if y.find('-->') >= 0:
data += y.split('-->')[1]
else:
data += y
Log.write('## nocom ' + data + '\n')
tags = [t.rstrip().rstrip('>') for t in data.split('<')[1:]]
root = []
def parse_tags(tree, tags):
in_comment = False
while len(tags) > 0:
tag = tags[0] #.strip() if strip helps, xml is malformed
if tag[0] == '/':
# unwind recurrsion until matching tag is found
if len(tree) > 0 \
and tag[1:].lower().startswith(tree[-1][0].lower()):
tags.pop(0)
continue
return
elif tag.startswith('!--'):
in_comment = True;
tags.pop(0)
subtree = []
text = ''
if tag.find('>') > 0:
# tag contains text
tag, text = tag.split('>', 1)
if tag[-1] == '/':
tag = tag[:-1] # standalone tag
elif in_comment and tag[-2:] == '--':
tag = tag[:-2] # standalone comment
in_comment = False
else:
# not a self-ending tag, recurse another level
parse_tags(subtree, tags)
parts = tag.split()
# tag contains only attributes
tree.append((parts[0], parts[1:], text, subtree, ''))
try: # comment out try/except to debug this ...
parse_tags(root, tags)
except:
Log.write('error parsing xml: %s\n' % traceback.format_exc(), CPerr)
return root
def search_xml(data, search_str):
# look for a specific tag
tags = [t.replace('>', ' ') for t in data.split('<')[1:]]
#Log.write('## tags ' + str(tags) + '\n')
rvals = []
for t in tags:
parts = t.split()
if parts[0] == search_str:
dct = {}
for p in parts[1:]:
if p.find('=') > 0:
if len(p.split('=')) > 0 and len(p.split('"')) > 1:
dct[p.split('=')[0]] = p.split('"')[1]
else:
Log.write('rdp xml format error: ' + str(p) + '\n', CPhi)
else:
dct[search_str] = p
rvals.append(dct)
return rvals
_feature_reads_to_process = []
class GdbClient(asyncore.dispatcher):
def __init__(self):
asyncore.dispatcher.__init__(self)
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
self.connect(Host_port)
self.cmds = []
self.sbuf = ''
self.rbuf = ''
self.rchksm = ''
self.lastcmd = None
self.state = None
self.nthreads = 0
self._threads = []
self.current_thread = None # 1 based
self.stopped_thread = None # None = emulator running
# eventually support all this? qemu doesn't yet ...
#self.queue_cmd('qSupported:multiprocess+;xmlRegisters=i386;qRelocInsn+')
# initiate the startup sequence
self.queue_cmd('qSupported')
def handle_connect(self):
pass
def handle_close(self):
self.close()
def handle_error(self):
if str(sys.exc_info()[1]).find('[Errno 111]') >= 0:
Log.write('cannot connect to %s (Connection refused)' %
str(Host_port) + ' is qemu running with -s ?\n', CPerr)
else:
Log.write('GdbClient exception: %s\n' % traceback.format_exc(), CPerr)
def writable(self):
if len(self.sbuf) > 0:
return True
if self.state == None and self.lastcmd == None and len(self.cmds) > 0:
return True
return False
def handle_write(self):
if len(self.cmds) <= 0:
return
cmd = self.cmds.pop(0)
self.lastcmd = cmd
s = '$' + cmd + '#' + "%02x" % (sum([ord(c) for c in cmd]) & 0xff)
self.sbuf = s.encode('ascii')
#DEBUG Log.write('w-- ' + str(self.sbuf) + '\n')
sent = self.send(self.sbuf)
self.sbuf = self.sbuf[sent:]
def queue_cmd(self, cmd):
self.cmds.append(cmd)
def handle_read(self):
data = self.recv(8192).decode('ascii')
#DEBUG Log.write('r-- ' + data + '\n')
self.process_read(data)
def process_read(self, data):
# two nested state machines here,
# the 'outside' sm parses $...#..
# the 'inside' sm collects rbuf and rchksm strings
# lastcmd remembers the cmd that triggered the current response
for c in data:
if self.state == None:
# state is inactive - we are outside of a msg packet
if c == '$':
self.state = '$'
self.rbuf = ''
self.rchksm = ''
continue
elif c == '+':
#print('msg ok')
continue
elif c == '-':
# if over a serial line retransmit might be in order
update_status('**** transmission failure ****', CPerr)
continue
# else state is active - we are inside a msg packet
if self.state == '$':
if c == '#':
self.state = 'chksm0'
else:
# collect characters until checksum
self.rbuf += c
continue
if self.state == 'chksm0':
# first checksum character
self.rchksm += c
self.state = 'chksm1'
continue
if self.state == 'chksm1':
# second checksum character
self.rchksm += c
self.state = None
# checksum complete, validate
s = "%02x" % (sum([ord(d) for d in self.rbuf]) & 0xff)
if s != self.rchksm:
update_status('**** warning: recv checksum mismatch **** [%s,%s]' %
(s, self.rchksm), CPerr)
self.rchksm = ''
#Log.write('++ state ' + str(self.state) + ' [%s|%s|%s]' % (
# c, self.rbuf, self.rchksm))
# process rbuf
if not self.lastcmd or len(self.lastcmd) == 0:
update_status('++ rbuf[%s]' % self.rbuf, CPdbg)
pass
elif self.lastcmd == 'qSupported':
self.process_supported()
elif self.lastcmd.startswith('qXfer:features:read:'):
self.process_feature_read()
elif self.lastcmd[:5] in ['?', 's', 'c', 'vCont']:
self.process_stop()
elif self.lastcmd == 'g':
self.process_regs()
elif self.lastcmd[0] == 'm':
self.process_mem()
elif self.lastcmd == 'qC':
self.process_currentthread()
elif self.lastcmd.startswith('Hg'):
self.process_currentthread()
elif self.lastcmd == 'qfThreadInfo':
if self.process_threadinfo():
self.queue_cmd('qsThreadInfo')
elif self.lastcmd == 'qsThreadInfo':
if self.process_threadinfo():
self.queue_cmd('qsThreadInfo')
else:
if self.rbuf != 'OK':
Log.write('++ rdp response to [%s] is [%s]\n' % (
self.lastcmd, self.rbuf))
self.lastcmd = None
self.rbuf = ''
else:
update_status('++ stray recv char [%s]' % c, CPerr)
def process_supported(self):
Log.write('++ supported: ' + str(self.rbuf.split(';')) + '\n')
features = self.rbuf.split(';')
cmds = 0
for feature in features:
if feature == 'qXfer:features:read+':
# sweet, we can actually know what arch we need,
# ask for the xml
self.queue_cmd('qXfer:features:read:target.xml:0,ffb')
cmds += 1
else:
Log.write('feature: %s\n' % feature)
if cmds == 0:
# get the machine state now
self.queue_cmd('?') # triggers process_stop
# else let process_feature_read get the machine state
def process_feature_read(self):
global Arch_name, _feature_reads_to_process
reqfn = self.lastcmd.split(':')[3]
# get names of xml files available for the target
incs = search_xml(self.rbuf, 'xi:include')
if len(incs) > 0:
_feature_reads_to_process += [inc['href'] for inc in incs]
if reqfn == 'target.xml':
atags = search_xml(self.rbuf, 'architecture')
Log.write('## arch ' + str(atags) + '\n')
Arch_name = atags[0]['architecture']
# hacks for names I hope they will change ...
if Arch_name == 'i386:x86-64':
Arch_name = 'i386'
Log.write('## archname [%s]' % Arch_name + '\n')
load_arch_module()
#Log.write('## reqfn [%s]\n' % reqfn)
#Log.write('## includes ' + str(_feature_reads_to_process) + '\n')
# qemu 3.x
feature = search_xml(self.rbuf, 'feature')
if len(feature) > 0 and 'name' in feature[0]:
Arch.generate_gspec(feature[0]['name'], search_xml(self.rbuf, 'reg'))
#logs parse output ... tree = parse_xml(self.rbuf)
if len(_feature_reads_to_process) == 0:
# then get the machine state
self.queue_cmd('?') # triggers process_stop
else:
self.queue_cmd('qXfer:features:read:%s:0,ffb' % _feature_reads_to_process.pop(0))
def process_stop(self):
# GDB_SIGNAL_0 = 0,
# GDB_SIGNAL_INT = 2,
# GDB_SIGNAL_QUIT = 3,
# GDB_SIGNAL_TRAP = 5,
# GDB_SIGNAL_ABRT = 6,
# GDB_SIGNAL_ALRM = 14,
# GDB_SIGNAL_IO = 23,
# GDB_SIGNAL_XCPU = 24,
# GDB_SIGNAL_UNKNOWN = 143
# we're going to need the arch module now
load_arch_module()
self.delete_breakpoints()
reasons = self.rbuf[3:].split(';')
st = 'stopped:'
for reason in reasons:
if len(reason) > 0:
n, r = reason.split(':')
if n == 'thread':
th = int(r, 16)
self.stopped_thread = th
st += ' cpu%d' % (th-1)
else:
st += ' reason=' + reason
update_status(st, CPnrm)
# initiate reload of all cpu regs
self.queue_cmd('qfThreadInfo')
# refetch all the mem windows.
# yup, memory fetch requests interlaced with qfThreadInfo!
# yea, the qemu gdbstub seems to have no problem with this!
for mem in Mems:
mem.refetch()
def process_regs(self):
global Arch, Arch_name
# currently the only way to know qemu has switched cpu modes is by
# the length of the get register data! if the current module doesn't
# support the length we received and it offers an alternate, switch.
arch_spec_lens = Arch.spec.keys()
spec_len = len(self.rbuf)
if not spec_len in arch_spec_lens:
new_name = Arch.alter_ego(spec_len)
if new_name:
Arch = None
Arch_name = new_name
load_arch_module() # presto changeo ...
# blank all the cpu windows
for cpu in Cpus.keys():
y,x = Arch.spec[spec_len]['maxy'], Arch.spec[spec_len]['maxx']
Cpus[cpu].resize(y, x)
else:
err = '**** expected one of %s hex digits for %s\n' % (
str(arch_spec_lens), Arch.name)
err += '**** but received %d (ie. unknown cpu architecture)' % (spec_len)
update_status(err, CPerr)
Log.write(err.replace('-', '****\n****') + '\n', attr=CPerr)
return
th = self.current_thread
i = n = 0
newregs = {}
for spec in Arch.spec[spec_len]['gspec']:
if spec[2] <= spec_len:
val = lsn2msn(self.rbuf[spec[1]:spec[2]])
newregs[spec[0]] = int(val, 16)
# during the first pass, Cpus objects may not have been created
if not th-1 in Cpus.keys():
Cpus[th-1] = Cpu(th-1, spec_len)
Cpus[th-1].update(newregs, spec_len)
#Log.write('++++ newregs ' + str(newregs), CPdbg)
refresh_all()
# humm.... would like to fetch about 8 bytes of memory at the ip,
# but I'm not sure multiple rdp queued commands work asynchronously
#addr = Arch.compute_ip_address()
#self.queue_cmd('m%08x,8' % addr)
def process_mem(self):
parts = self.lastcmd.split(',')
addr = int(parts[0].strip()[1:], 16)
length = int(parts[1], 16)
st = '++ mem data 0x%x' % addr
for mem in Mems:
if mem.addr == addr: # match data with mem panel
mem.update(self.rbuf, length)
st += ' updated!'
update_status(st, CPdbg)
def process_threadinfo(self):
global Reorder_cpus
if self.rbuf == 'l':
# no more threads/cpus
if Reorder_cpus: # first time?
reorder_cpu_panels(self.stopped_thread, self.nthreads)
Reorder_cpus = False
# restore stopped thread/cpu
# humm, self.stopped_thread can be None if user steps too fast?
if self.stopped_thread:
self.queue_cmd('Hg%02x' % self.stopped_thread)
# FIXME if pgdb can't pick the right source file, setting
# active obj here will override the users source file
# selection and piss them off ...
set_active_object(Cpus[self.stopped_thread-1])
if Active_src:
Active_src.center()
return False # no more thread/cpu data need to be fetched
# extract the thread number
th = int(self.rbuf[1:], 16)
if not th-1 in Cpus.keys():
Cpus[th-1] = Cpu(th-1, 0)
if not th-1 in self._threads:
self.nthreads += 1
self._threads.append(th-1)
self.queue_cmd('Hg%02x' % th)
self.queue_cmd('g') # re/populate regs for this cpu
return True # more threads/cpus might exist
def process_currentthread(self):
self.current_thread = int(self.lastcmd[2:], 16)
# def process_selectcpu(self):
# # 'C' command maybe ...
# if self.rbuf == "OK":
# self.current_thread = int(self.lastcmd[2])
# update_status('current thread now %d' % self.current_thread, CPdbg)
def single_step(self):
#self.queue_cmd('s') # old school gdb
cmd = 'vCont'
if Active_cpu:
# single-step the active cpu
# NOTE: qemu's gdbstub seems to be pretty cavalier with this
# command - often I see lots of cpus advance ...
cmd += ';s:%02x' % (Active_cpu.i+1)
else:
# single-step all cpus
for thread in range(self.nthreads):
cmd += ';s:%02x' % (thread+1)
self.queue_cmd(cmd)
def single_step_all(self):
cmd = 'vCont'
# single-step all cpus
for thread in range(self.nthreads):
cmd += ';s:%02x' % (thread+1)
self.queue_cmd(cmd)
def cont(self):
cmd = 'vCont'
if Active_cpu:
# continue the active cpu
cmd += ';c:%02x' % (Active_cpu.i+1)
else:
# continue all cpus
for thread in range(self.nthreads):
cmd += ';c:%02x' % (thread+1)
self.queue_cmd(cmd)
self.stopped_thread = None
def cont_all(self):
# continue all cpus
cmd = 'vCont'
for thread in range(self.nthreads):
cmd += ';c:%02x' % (thread+1)
self.queue_cmd(cmd)
self.stopped_thread = None
def delete_breakpoints(self):
global Breakpoints
for bp in Breakpoints.keys():
bp = bp.replace('Z', 'z')
self.queue_cmd(bp)
update_status('++ deleted %s' % bp, CPdbg)
Breakpoints = {}
def delete_watchpoints(self):
global Watchpoints
for bp in Watchpoints.keys():
bp = bp.replace('Z', 'z')
self.queue_cmd(bp)
update_status('++ deleted %s' % bp, CPdbg)
Watchpoints = {}
# ----------------------------------------------------------------------------
# Format Arbitrary Data Structure (fads) - support functions
#
# Sure, this could be a library ...
#
# Many programmers think nothing of adding external libraries to their code.
# But I hate the effect it has on end users. For every spider-web dependency
# we make users download, I'll bet we loose a big chunk of our audience - each
# dependency! Yeah, its geometric. So until fads makes it in to the core
# python install(!), I'm refusing to make it a library. *You* can, of course
# if you want, but *I* will not require users to setup extra support libraries
# to make pgdb run. Besides, since pgdb can switch architecture modules out on
# the fly, FADS should only really be loaded once even though many of the
# modules use it. What follows are the member functions for the FADS system
# wrapped in a dictionary that I can export across modules.
#
# Note: it would be nice if I could include all the DS class constructors
# here but this opens a whole can of worms including a chicken-egg module
# load issue for which I currently don't see a clean solution. The classes
# must be available to the module *while* the architecture submodule is being
# imported ... create_module() doesn't seem to have a way to forward parts
# of the loader environment and make them visible to the loading evnvironment.
# For now, the DS class constructors will have to be pasted into each arch
# module that uses them. (no library! users get to be able to copy
# pgdb.py and a few of the arch modules of their choice to a new location
# and start running without easy-install or pypi whatever...)
def ds_reconstruct_hex(data, build_list):
# this reconstructor operates on a hexidecimal string of data.
# (aka. it is tuned for gdb remote debug protocol data)
# bytes are encoded as [high-nibble,low-nibble] and
# are assembled from the lowest addres to highest address.
# masks here are rounded up to a multiple of 4 bits.
srval = ''
rval = 0
mask = 0
for bld in build_list:
val = ''
for byte in range(bld.firstb, bld.lastb+1):
val = data[byte*2:byte*2+2] + val
if bld.lshift % 4 != 0:
raise Exception('for hex reconstruction, lshift must be mult of 4')
try:
v = int(val, 16) << bld.lshift
except:
Log.write('+++ fail val=[%s]' % str(val), CPerr)
raise Exception('bad hex value: ' + str(val))
rval |= v
m = bld.mask << bld.lshift
mask |= m
srval = val + srval
# apply mask
return '%0*x' % (len('%x' % mask), rval & mask), rval & mask
#def ds_reconstruct_packed_struct(data, build_list):