diff --git a/examples/hello_world/hello_world.c b/examples/hello_world/hello_world.c new file mode 100644 index 00000000..6cba78c7 --- /dev/null +++ b/examples/hello_world/hello_world.c @@ -0,0 +1,34 @@ +#include +#include +#include + +/* + * This is simple hello_world program made for education purposes. + * Licensed under CC-BY-SA 4.0 license. + * + * In order to compile executable run: + * + * gcc -o hello_world hello_world.c + * + */ + +int main(int argc, char* argv[]) +{ + const char *str1 = "Hello "; + const char *str2 = "world!"; + + size_t str1_size = strlen(str1); + size_t str2_size = strlen(str2); + + char *output = malloc(str1_size + str2_size + 1); + if (output) + { + strcpy(output, str1); + strcat(output, str2); + + puts(output); + free(output); + } + + return 0; +} diff --git a/src/analysis/calling_conventions.md b/src/analysis/calling_conventions.md index fcd7d617..4f72a817 100644 --- a/src/analysis/calling_conventions.md +++ b/src/analysis/calling_conventions.md @@ -5,16 +5,11 @@ It is used also as a guide for basic function prototype and type propagation. ``` [0x00000000]> afc? -Usage: afc[agl?] -| afc convention Manually set calling convention for current function -| afc Show Calling convention for the Current function -| afcr[j] Show register usage for the current function -| afca Analyse function for finding the current calling convention -| afcf[j] [name] Prints return type function(arg1, arg2...), see afij -| afck List SDB details of call loaded calling conventions -| afcl List all available calling conventions -| afco path Open Calling Convention sdb profile from the given path -| afcR Register telescoping using the calling conventions order +Usage: afc[lor] # Calling convention +| afc [] # Set/Get calling convention for current function +| afcl[j*kl] # List all available calling conventions +| afco # Open Calling Convention sdb profile from given path +| afcr[j] # Show register usage for the current function [0x00000000]> ``` @@ -22,25 +17,16 @@ To list all available calling conventions for current architecture using `afcl` ``` [0x00000000]> afcl -swift amd64 amd64syscall ms reg +swift ``` The default calling convention for a particular architecture/binary is defined with `analysis.cc` for user-mode calls and `analysis.syscc` for syscalls. -To display a function prototype of standard library functions you have the `afcf` command - -``` -[0x00000000]> afcf printf -int printf(const char *format) -[0x00000000]> afcf fgets -char *fgets(char *s, int size, FILE *stream) -``` - All this information is loaded via sdb under `/librz/analysis/d/cc-[arch]-[bits].sdb` ``` @@ -58,6 +44,7 @@ cc.ms.ret=rax `cc.x.argi=rax` is used to set the ith argument of this calling convention to register name `rax` -`cc.x.argn=stack` means that all the arguments (or the rest of them in case there was `argi` for any `i` as counting number) will be stored in the stack from left to right +`cc.x.argn=stack` means that all the arguments (or the rest of them in case there was `argi` for any `i` as +counting number) will be stored in the stack from left to right `cc.x.argn=stack_rev` same as `cc.x.argn=stack` except for it means argument are passed right to left diff --git a/src/analysis/code_analysis.md b/src/analysis/code_analysis.md index 486c2c4a..f2fa28c2 100644 --- a/src/analysis/code_analysis.md +++ b/src/analysis/code_analysis.md @@ -4,77 +4,94 @@ Code analysis is a common technique used to extract information from assembly co Rizin has different code analysis techniques implemented in the core and available in different commands. -As long as the whole functionalities of rizin are available with the API as well as using commands. This gives you the ability to implement your own analysis loops using any programming language, even with rizin one-liners, shellscripts, or analysis or core native plugins. - -The analysis will show up the internal data structures to identify basic blocks, function trees and to extract opcode-level information. - -The most common rizin analysis command sequence is `aa`, which stands for "analyze all". That all is referring to all symbols and entry-points. If your binary is stripped you will need to use other commands like `aaa`, `aab`, `aar`, `aac` or so. - -Take some time to understand what each command does and the results after running them to find the best one for your needs. - -``` -[0x08048440]> aa -[0x08048440]> pdf @ main - ; DATA XREF from 0x08048457 (entry0) -/ (fcn) fcn.08048648 141 -| ;-- main: -| 0x08048648 8d4c2404 lea ecx, [esp+0x4] -| 0x0804864c 83e4f0 and esp, 0xfffffff0 -| 0x0804864f ff71fc push dword [ecx-0x4] -| 0x08048652 55 push ebp -| ; CODE (CALL) XREF from 0x08048734 (fcn.080486e5) -| 0x08048653 89e5 mov ebp, esp -| 0x08048655 83ec28 sub esp, 0x28 -| 0x08048658 894df4 mov [ebp-0xc], ecx -| 0x0804865b 895df8 mov [ebp-0x8], ebx -| 0x0804865e 8975fc mov [ebp-0x4], esi -| 0x08048661 8b19 mov ebx, [ecx] -| 0x08048663 8b7104 mov esi, [ecx+0x4] -| 0x08048666 c744240c000. mov dword [esp+0xc], 0x0 -| 0x0804866e c7442408010. mov dword [esp+0x8], 0x1 ; 0x00000001 -| 0x08048676 c7442404000. mov dword [esp+0x4], 0x0 -| 0x0804867e c7042400000. mov dword [esp], 0x0 -| 0x08048685 e852fdffff call sym..imp.ptrace -| sym..imp.ptrace(unk, unk) -| 0x0804868a 85c0 test eax, eax -| ,=< 0x0804868c 7911 jns 0x804869f -| | 0x0804868e c70424cf870. mov dword [esp], str.Don_tuseadebuguer_ ; 0x080487cf -| | 0x08048695 e882fdffff call sym..imp.puts -| | sym..imp.puts() -| | 0x0804869a e80dfdffff call sym..imp.abort -| | sym..imp.abort() -| `-> 0x0804869f 83fb02 cmp ebx, 0x2 -|,==< 0x080486a2 7411 je 0x80486b5 -|| 0x080486a4 c704240c880. mov dword [esp], str.Youmustgiveapasswordforusethisprogram_ ; 0x0804880c -|| 0x080486ab e86cfdffff call sym..imp.puts -|| sym..imp.puts() -|| 0x080486b0 e8f7fcffff call sym..imp.abort -|| sym..imp.abort() -|`--> 0x080486b5 8b4604 mov eax, [esi+0x4] -| 0x080486b8 890424 mov [esp], eax -| 0x080486bb e8e5feffff call fcn.080485a5 -| fcn.080485a5() ; fcn.080484c6+223 -| 0x080486c0 b800000000 mov eax, 0x0 -| 0x080486c5 8b4df4 mov ecx, [ebp-0xc] -| 0x080486c8 8b5df8 mov ebx, [ebp-0x8] -| 0x080486cb 8b75fc mov esi, [ebp-0x4] -| 0x080486ce 89ec mov esp, ebp -| 0x080486d0 5d pop ebp -| 0x080486d1 8d61fc lea esp, [ecx-0x4] -\ 0x080486d4 c3 ret -``` - -In this example, we analyze the whole file (`aa`) and then print disassembly of the `main()` function (`pdf`). +All features of Rizin are available from the library APIs as well as from commands. +This gives you the ability to implement your own analysis loops using any programming language, even with Rizin +one-liners, shell scripts, or analysis/core native plugins. + +The analysis will show up the internal data structures to identify basic blocks, function trees and to extract +opcode-level information. + +The most common rizin analysis sequence utilizes is **analyze all** command (`aa`), which examines all symbols and +entry-points. If the binary is stripped, using advance automated analysis `aaa` will be necessary; advanced analysis +steps can be invoked manually with commands like `aab`, `aar`, or `aac`. + +Let's analyse simple [hello_world](https://github.com/rizinorg/book/tree/master/examples/hello_world) program: + +``` +[0x00001100]> aa +[x] Analyze all flags starting with sym. and entry0 (aa) + +[0x00001100]> pdf @ main + ; DATA XREF from entry0 @ 0x1118 +/ int main(int argc, char **argv, char **envp); +| ; arg int argc @ rdi +| ; arg char **argv @ rsi +| ; var int64_t var_48h @ stack - 0x48 +| ; var int64_t var_3ch @ stack - 0x3c +| ; var int64_t var_30h @ stack - 0x30 +| ; var int64_t var_28h @ stack - 0x28 +| ; var int64_t var_20h @ stack - 0x20 +| ; var int64_t var_18h @ stack - 0x18 +| ; var int64_t var_10h @ stack - 0x10 +| 0x000011e9 endbr64 +| 0x000011ed push rbp +| 0x000011ee mov rbp, rsp +| 0x000011f1 sub rsp, 0x40 +| 0x000011f5 mov dword [var_3ch], edi ; argc +| 0x000011f8 mov qword [var_48h], rsi ; argv +| 0x000011fc lea rax, [str.Hello] ; 0x2004 ; "Hello " +| 0x00001203 mov qword [var_30h], rax +| 0x00001207 lea rax, [str.world] ; 0x200b ; "world!" +| 0x0000120e mov qword [var_28h], rax +| 0x00001212 mov rax, qword [var_30h] +| 0x00001216 mov rdi, rax +| 0x00001219 call sym.imp.strlen ; sym.imp.strlen ; size_t strlen(const char *s) +| 0x0000121e mov qword [var_20h], rax +| 0x00001222 mov rax, qword [var_28h] +| 0x00001226 mov rdi, rax +| 0x00001229 call sym.imp.strlen ; sym.imp.strlen ; size_t strlen(const char *s) +| 0x0000122e mov qword [var_18h], rax +| 0x00001232 mov rdx, qword [var_20h] +| 0x00001236 mov rax, qword [var_18h] +| 0x0000123a add rax, rdx +| 0x0000123d add rax, 1 +| 0x00001241 mov rdi, rax +| 0x00001244 call sym.imp.malloc ; sym.imp.malloc ; void *malloc(size_t size) +| 0x00001249 mov qword [var_10h], rax +| 0x0000124d cmp qword [var_10h], 0 +| ,=< 0x00001252 je 0x1292 +| | 0x00001254 mov rdx, qword [var_30h] +| | 0x00001258 mov rax, qword [var_10h] +| | 0x0000125c mov rsi, rdx +| | 0x0000125f mov rdi, rax +| | 0x00001262 call sym.imp.strcpy ; sym.imp.strcpy ; char *strcpy(char *dest, const char *src) +| | 0x00001267 mov rdx, qword [var_28h] +| | 0x0000126b mov rax, qword [var_10h] +| | 0x0000126f mov rsi, rdx +| | 0x00001272 mov rdi, rax +| | 0x00001275 call sym.imp.strcat ; sym.imp.strcat ; char *strcat(char *s1, const char *s2) +| | 0x0000127a mov rax, qword [var_10h] +| | 0x0000127e mov rdi, rax +| | 0x00001281 call sym.imp.puts ; sym.imp.puts ; int puts(const char *s) +| | 0x00001286 mov rax, qword [var_10h] +| | 0x0000128a mov rdi, rax +| | 0x0000128d call sym.imp.free ; sym.imp.free ; void free(void *ptr) +| `-> 0x00001292 mov eax, 0 +| 0x00001297 leave +\ 0x00001298 ret +``` + +In this example, we analyze the whole file (via `aa`) and then print disassembly of the `main()` function (via `pdf`). The `aa` command belongs to the family of auto analysis commands and performs only the most basic -auto analysis steps. In rizin there are many different types of auto analysis commands with a +auto analysis steps. In Rizin there are many different types of auto analysis commands with a different analysis depth, including partial emulation: `aa`, `aaa`, `aab`, `aaaa`, ... There is also a mapping of those commands to the rizin CLI options: `rizin -A`, `rizin -AA`, and so on. It is common sense that completely automated analysis can produce non sequitur results, thus -rizin provides separate commands for the particular stages of the analysis allowing fine-grained +Rizin provides separate commands for the particular stages of the analysis allowing fine-grained control of the analysis process. Moreover, there is a treasure trove of configuration variables -for controlling the analysis outcomes. You can find them in `analysis.*` and `emu.*` -cfg variables' namespaces. +for controlling the analysis outcomes. You can find them via command `el` under `analysis.*` and `emu.*` +variables namespaces. ## Analyze functions @@ -83,40 +100,39 @@ One of the most important "basic" analysis commands is the set of `af` subcomman function or perform a completely manual one. ``` -[0x00000000]> af? -Usage: af -| af ([name]) ([addr]) analyze functions (start at addr or $$) -| afr ([name]) ([addr]) analyze functions recursively -| af+ addr name [type] [diff] hand craft a function (requires afb+) -| af- [addr] clean all function analysis data (or function at addr) -| afa analyze function arguments in a call (afal honors dbg.funcarg) -| afb+ fcnA bbA sz [j] [f] ([t]( [d])) add bb to function @ fcnaddr -| afb[?] [addr] List basic blocks of given function -| afbF([0|1]) Toggle the basic-block 'folded' attribute -| afB 16 set current function as thumb (change asm.bits) -| afC[lc] ([addr])@[addr] calculate the Cycles (afC) or Cyclomatic Complexity (afCc) -| afc[?] type @[addr] set calling convention for function -| afd[addr] show function + delta for given offset -| afF[1|0|] fold/unfold/toggle -| afi [addr|fcn.name] show function(s) information (verbose afl) -| afj [tableaddr] [count] analyze function jumptable -| afl[?] [ls*] [fcn name] list functions (addr, size, bbs, name) (see afll) -| afm name merge two functions -| afM name print functions map -| afn[?] name [addr] rename name for function at address (change flag too) -| afna suggest automatic name for current offset -| afo[?j] [fcn.name] show address for the function name or current offset -| afs[!] ([fcnsign]) get/set function signature at current address (afs! uses cfg.editor) -| afS[stack_size] set stack frame size for function at current address -| afsr [function_name] [new_type] change type for given function -| aft[?] type matching, type propagation -| afu addr resize and analyze function from current address until addr -| afv[absrx]? manipulate args, registers and variables in function -| afx list function references +[0x00001100]> af? +Usage: af[?] # Analyze Functions commands +| af [] # Analyze functions recursively (honors `analysis.calls`) +| afr [] # Analyze functions recursively +| af+ [] # Hand craft a function (requires `afb+`) +| af- # Delete function +| af-* # Delete all function analysis data +| afj # Analyze function jumptable +| afa # Analyze function arguments in a call +| afal # Analyze function arguments in a call (honors `dbg.funcarg`) +| afb[?] # Basic blocks commands +| afB # Set asm.bits for the current function +| afs[jb!r] # Function signatures commands +| afo[j] # Show address of current function +| afu # Resize and analyze function from current address until addr +| afx[j] # List function references +| afS # Set stack frame size for function at current address +| afv # Manipulate arguments/variables in a function +| afl[?] # List functions +| afi[j*is] # Show/edit function information +| afn[as] # Analyze function names +| aft # Type matching analysis for the function in current seek +| afM # Print functions map +| afm # Merge two functions +| afc[lor] # Calling convention +| afd # Show function + delta for given offset ``` + You can use `afl` to list the functions found by the analysis. -There are a lot of useful commands under `afl` such as `aflj`, which lists the function in JSON format and `aflm`, which lists the functions in the syntax found in makefiles. +There are a lot of useful commands under `afl` such as `aflj` (lists the function in JSON format; `j` is a common +command suffix to provide JSON output), `aflm` (lists the functions in the syntax found in makefiles), or +`afll` (lists all functions in verbose mode). There's also `afl=`, which displays ASCII-art bars with function ranges. @@ -128,7 +144,8 @@ For the semi-automatic, you can use `afm ` to merge the current f the one specified by name as an argument, `aff` to readjust the function after analysis changes or function edits, `afu
` to do the resize and analysis of the current function until the specified address. -Apart from those semi-automatic ways to edit/analyze the function, you can handcraft it in the manual mode with `af+` command and edit basic blocks of it using `afb` commands. +Apart from those semi-automatic ways to edit/analyze the function, you can handcraft it in the manual mode with +`af+` command and edit basic blocks of it using `afb` commands. Before changing the basic blocks of the function it is recommended to check the already presented ones: ``` @@ -142,7 +159,9 @@ Before changing the basic blocks of the function it is recommended to check the ``` ### Handcraft function -before starting, let's prepare a binary file first, for example: + +Before starting, let's prepare a binary file first, for example: + ```C int code_block() { @@ -154,9 +173,11 @@ int code_block() return result; } ``` -then compile it with `gcc -c example.c -m32 -O0 -fno-pie`, we will get the object file `example.o`. open it with rizin. -since we haven't analyzed it yet, the `pdf` command will not print out the disassembly here: +Then compile it with `gcc -c example.c -m32 -O0 -fno-pie`, we will get the object file `example.o`. Open it with rizin. + +Since we haven't analyzed it yet, the `pdf` command will not print out the disassembly here: + ``` $ rizin example.o [0x08000034]> pdf @@ -181,38 +202,45 @@ p: Cannot find function at 0x08000034 0x0800005c c3 ret ``` -our goal is to handcraft a function with the following structure + +Our goal is to handcraft a function with the following structure ![analyze_one](analyze_one.png) -create a function at 0x8000034 named code_block: +Create a function at 0x8000034 named code_block: + ``` [0x8000034]> af+ 0x8000034 code_block ``` -In most cases, we use jump or call instructions as code block boundaries. so the range of first block is from `0x08000034 push ebp` to `0x08000048 jmp 0x8000052`. -use `afb+` command to add it. +In most cases, we use jump or call instructions as code block boundaries. So the range of first block is from +`0x08000034 push ebp` to `0x08000048 jmp 0x8000052`. Use `afb+` command to add it. ``` [0x08000034]> afb+ code_block 0x8000034 0x800004a-0x8000034 0x8000052 ``` -note that the basic syntax of `afb+` is `afb+ function_address block_address block_size [jump] [fail]`. the final instruction of this block points to a new address(jmp 0x8000052), thus we add the address of jump target (0x8000052) to reflect the jump info. +Note that the basic syntax of `afb+` is `afb+ function_address block_address block_size [jump] [fail]`. +The final instruction of this block points to a new address (jmp 0x8000052), thus we add the address of jump target +(0x8000052) to reflect the jump info. -the next block (0x08000052 ~ 0x08000056) is more likely an if conditional statement which has two branches. It will jump to 0x800004a if `jle-less or equal`, otherwise (the fail condition) jump to next instruction -- 0x08000058.: +The next block (0x08000052 ~ 0x08000056) is more likely an if conditional statement which has two branches. It will +jump to 0x800004a if `less or equal`, otherwise (the fail condition) jump to next instruction - 0x08000058: ``` [0x08000034]> afb+ code_block 0x8000052 0x8000058-0x8000052 0x800004a 0x8000058 ``` -follow the control flow and create the remaining two blocks (two branches) : +Follow the control flow and create the remaining two blocks (two branches): + ``` [0x08000034]> afb+ code_block 0x800004a 0x8000052-0x800004a 0x8000052 [0x08000034]> afb+ code_block 0x8000058 0x800005d-0x8000058 ``` -check our work: +Check our work: + ``` [0x08000034]> afb 0x08000034 0x0800004a 00:0000 22 j 0x08000052 @@ -224,9 +252,12 @@ check our work: ![handcraft_one](handcraft_one.png) -There are two very important commands for this: `afc` and `afB`. The latter is a must-know command for some platforms like ARM. It provides a way to change the "bitness" of a particular function by allowing to select between ARM and Thumb modes. +There are two very important commands for this: `afc` and `afB`. The latter is a must-know command for some platforms +like ARM. It provides a way to change the "bitness" of a particular function by allowing to select between ARM and +Thumb modes. -`afc` on the other side, allows to manually specify function calling convention. You can find more information on its usage in [calling_conventions](calling_conventions.md). +`afc` on the other side, allows to manually specify function calling convention. You can find more information on its +usage in [calling_conventions](calling_conventions.md). ## Recursive analysis @@ -243,28 +274,22 @@ wide choice of manual references' creation of any kind. For this fine-grained co you can use `ax` commands. ``` -Usage: ax[?d-l*] # see also 'afx?' -| ax list refs -| ax* output rizin commands -| ax addr [at] add code ref pointing to addr (from curseek) -| ax- [at] clean all refs/refs from addr -| ax-* clean all refs/refs -| axc addr [at] add generic code ref -| axC addr [at] add code call ref -| axg [addr] show xrefs graph to reach current function -| axg* [addr] show xrefs graph to given address, use .axg*;aggv -| axgj [addr] show xrefs graph to reach current function in json format -| axd addr [at] add data ref -| axq list refs in quiet/human-readable format -| axj list refs in json format -| axF [flg-glob] find data/code references of flags -| axm addr [at] copy data/code references pointing to addr to also point to curseek (or at) -| axt [addr] find data/code references to this address -| axf [addr] find data/code references from this address -| axv [addr] list local variables read-write-exec references -| ax. [addr] find data/code references from and to this address -| axff[j] [addr] find data/code references from this function -| axs addr [at] add string ref +[0x00001100]> ax? +Usage: ax[?] # Cross references (xrefs) +| ax # Add custom xref to addr from current seek +| axc # Add generic code xref to addr from current seek +| axC # Add call code xref to addr from current seek +| axd # Add data xref to addr from current seek +| axs # Add string xref to addr from current seek +| axl[j*q] # List all xrefs +| axt[j*q] # List xrefs to current seek +| axf[j*q] # List xrefs from current seek +| axtg # Display commands to generate graphs according to xrefs +| ax- [] # Delete xrefs to addr +| ax-* # Delete all xrefs +| axm # Copy xrefs pointing to addr to also point to curseek +| axg[j*] # Show xrefs graph to reach function at current seek + ``` The most commonly used `ax` commands are `axt` and `axf`, especially as a part of various rz-pipe @@ -289,29 +314,30 @@ sub.strlen_d50 0x5de0 [STRING] lea rcx, str.02x (nofunc) 0x17838 [CODE] jae str.02x ``` -There are also some useful commands under `axt`. Use `axtg` to generate rizin commands which will help you to create graphs according to the XREFs. +There are also some useful commands under `axt`. Use `axtg` to generate rizin commands which will help you to create +graphs according to the XREFs. ``` -[0x08048320]> s main -[0x080483e0]> axtg -agn 0x8048337 "entry0 + 23" -agn 0x80483e0 "main" -age 0x8048337 0x80483e0 +[0x00001100]> s main +[0x000011e9]> axtg +agn 0x1118 "entry0 + 24" +agn 0x11e9 "main" +age 0x1118 0x11e9 ``` Use `axt*` to split the rizin commands and set flags on those corresponding XREFs. -Also under `ax` is `axg`, which finds the path between two points in the file by showing an XREFs graph to reach the location or function. For example: +Also under `ax` is `axg`, which finds the path between two points in the file by showing an XREFs graph to reach +the location or function. For example: ``` -:> axg sym.imp.printf -- 0x08048a5c fcn 0x08048a5c sym.imp.printf - - 0x080483e5 fcn 0x080483e0 main - - 0x080483e0 fcn 0x080483e0 main - - 0x08048337 fcn 0x08048320 entry0 - - 0x08048425 fcn 0x080483e0 main +[0x000011e9]> axg +- 0x000011e9 fcn 0x000011e9 main + - 0x00001118 fcn 0x00001100 entry0 ``` -Use `axg*` to generate rizin commands which will help you to create graphs using `agn` and `age` commands, according to the XREFs. + +Use `axg*` to generate rizin commands which will help you to create graphs using `agn` and `age` commands, +according to the XREFs. Apart from predefined algorithms to identify functions there is a way to specify a function prelude with a configuration option `analysis.prelude`. For example, like @@ -341,15 +367,15 @@ There are different kinds of configuration options: The two most commonly used options for changing the behavior of control flow analysis in rizin are `analysis.hasnext` and `analysis.jmp.after`. The first one allows forcing rizin to continue the analysis after the end of the function, even if the next chunk of the code wasn't called anywhere, thus -analyzing all of the available functions. The latter one allows forcing rizin to continue +analyzing all the available functions. The latter one allows forcing rizin to continue the analysis even after unconditional jumps. In addition to those we can also set `analysis.jmp.indir` to follow the indirect jumps, continuing analysis; `analysis.pushret` to analyze `push ...; ret` sequence as a jump; `analysis.nopskip` to skip the NOP sequences at a function beginning. -For now, rizin also allows you to change the maximum basic block size with `analysis.bb.maxsize` option -. The default value just works in most use cases, but it's useful to increase that for example when +For now, rizin also allows you to change the maximum basic block size with `analysis.bb.maxsize` option. +The default value just works in most use cases, but it's useful to increase that for example when dealing with obfuscated code. Beware that some of the basic block control options may disappear in the future in favor of more automated ways to set those. @@ -377,13 +403,16 @@ There are a few options for this: - `analysis.limits` - enables the range limits for analysis operations - `analysis.from` - starting address of the limit range - `analysis.to` - the corresponding end of the limit range -- `analysis.in` - specify search boundaries for analysis. You can set it to `io.maps`, `io.sections.exec`, `dbg.maps` and many more. For example: - - To analyze a specific memory map with `analysis.from` and `analysis.to`, set `analysis.in = dbg.maps`. +- `analysis.in` - specify search boundaries for analysis. You can set it to `io.maps`, `bin.sections`, `dbg.maps` + and many more. For example: + - To analyze a specific memory map with `analysis.from` and `analysis.to`, set `analysis.in=dbg.maps`. - To analyze in the boundaries set by `analysis.from` and `analysis.to`, set `analysis.in=range`. - - To analyze in the current mapped segment or section, you can put `analysis.in=bin.segment` or `analysis.in=bin.section`, respectively. + - To analyze in the current mapped segment or section, you can put `analysis.in=bin.segment` or + `analysis.in=bin.section`, respectively. - To analyze in the current memory map, specify `analysis.in=dbg.map`. - To analyze in the stack or heap, you can set `analysis.in=dbg.stack` or `analysis.in=dbg.heap`. - - To analyze in the current function or basic block, you can specify `analysis.in=analysis.fcn` or `analysis.in=analysis.bb`. + - To analyze in the current function or basic block, you can specify `analysis.in=analysis.fcn` or + `analysis.in=analysis.bb`. Please see `e analysis.in=??` for the complete list. @@ -391,7 +420,7 @@ Please see `e analysis.in=??` for the complete list. Jump tables are one of the trickiest targets in binary reverse engineering. There are hundreds of different types, the end result depending on the compiler/linker and LTO stages of optimization. -Thus rizin allows enabling some experimental jump tables detection algorithms using `analysis.jmp.tbl` +Thus, Rizin allows enabling some experimental jump tables detection algorithms using `analysis.jmp.tbl` option. Eventually, algorithms moved into the default analysis loops once they start to work on every supported platform/target/test-case. Two more options can affect the jump tables analysis results too: @@ -403,7 +432,7 @@ Two more options can affect the jump tables analysis results too: There are two common problems when analyzing embedded targets: ARM/Thumb detection and MIPS GP value. In the case of ARM binaries rizin supports some auto-detection of ARM/Thumb mode switches, but -beware that it uses partial ESIL emulation, thus slowing the analysis process. If you will not +beware that it uses partial ESIL emulation, thus slowing the analysis process. If you do not like the results, particular functions' mode can be overridden with `afB` command. The MIPS GP problem is even trickier. It is basic knowledge that GP value can be different not only @@ -415,7 +444,7 @@ future in favor of more automated analysis. ## Visuals -One of the easiest way to see and check the changes of the analysis commands and variables +One of the easiest ways to see and check the changes of the analysis commands and variables is to perform scrolling in a `Vv` special visual mode, allowing functions preview: ![vv](code_analysis_vv.png) @@ -436,50 +465,65 @@ to override some basic opcode or meta-information properties, or even to rewrite string. These commands are located under `ah` namespace: ``` -Usage: ah[lba-] Analysis Hints -| ah? show this help -| ah? offset show hint of given offset -| ah list hints in a human-readable format -| ah. list hints in human-readable format from current offset -| ah- remove all hints -| ah- offset [size] remove hints at the given offset -| ah* offset list hints in rizin commands format -| aha ppc @ 0x42 force arch ppc for all address >= 0x42 or until the next hint -| aha 0 @ 0x84 disable the effect of arch hints for all address >= 0x84 or until the next hint -| ahb 16 @ 0x42 force 16bit for all address >= 0x42 or until the next hint -| ahb 0 @ 0x84 disable the effect of bits hints for all address >= 0x84 or until the next hint -| ahc 0x804804 override call/jump address -| ahd foo a0,33 replace opcode string -| ahe 3,eax,+= set vm analysis string -| ahf 0x804840 override fallback address for call -| ahF 0x10 set stackframe size at the current offset -| ahh 0x804840 highlight this address offset in disasm -| ahi[?] 10 define numeric base for immediates (2, 8, 10, 10u, 16, i, p, S, s) -| ahj list hints in JSON -| aho call change opcode type (see aho?) (deprecated, moved to "ahd") -| ahp addr set pointer hint -| ahr val set hint for the return value of a function -| ahs 4 set opcode size=4 -| ahS jz set asm.syntax=jz for this opcode -| aht [?] Mark immediate as a type offset (deprecated, moved to "aho") -| ahv val change opcode's val field (useful to set jmptbl sizes in jmp rax) -``` - -One of the most common cases is to set a particular numeric base for immediates: +[0x00001100]> ah? +Usage: ah # Analysis hints +| ahl[j*] # List all analysis hints +| ahl.[j*] # List analysis hints at current seek +| ah- [] # Delete analysis hints in region starting from current seek +| ah-* # Delete all analysis hints +| aha # Set arch hint +| aha- # Delete arch hint +| ahb # Set bits hint +| ahb- # Delete bits hint +| ahh # Set highlight hint +| ahh- # Delete highlight hint +| ahc # Set jump/call address hint +| ahc- # Delete jump/call address hint +| ahe # Set ESIL string hint +| ahe- # Delete ESIL string hint +| ahd # Set opcode hint +| ahd- # Delete opcode hint +| ahs # Set opcode size hint +| ahs- # Delete opcode size hint +| ahf # Set fallback address hint +| ahf- # Delete fallback address hint +| ahF # Set stackframe size hint +| ahF- # Delete stackframe size hint +| ahS # Set asm syntax hint +| ahS- # Delete asm syntax hint +| ahp # Set pointer hint +| ahp- # Delete pointer hint +| ahr # Set function return value hint +| ahr- # Delete function return value hint +| ahv # Set opcode value hint +| ahv- # Delete opcode value hint +| aho # Set opcode type hint +| aho- # Delete opcode type hint +| ahi [] # Set immediate base hint +| ahi- # Delete immediate base hint +| aht # Set structure offset hint +| aht- # Delete structure offset hint +| ahts # List all matching structure offsets +``` + +One of the most common cases is to set a particular numeric base for immediate: ``` [0x00003d54]> ahi? -Usage: ahi [2|8|10|10u|16|bodhipSs] [@ offset] Define numeric base -| ahi set numeric base (2, 8, 10, 16) -| ahi 10|d set base to signed decimal (10), sign bit should depend on receiver size -| ahi 10u|du set base to unsigned decimal (11) -| ahi b set base to binary (2) -| ahi o set base to octal (8) -| ahi h set base to hexadecimal (16) -| ahi i set base to IP address (32) -| ahi p set base to htons(port) (3) -| ahi S set base to syscall (80) -| ahi s set base to string (1) +Usage: ahi [] # Set immediate base hint +| ahi # Set numeric (2, 8, 10, 16) +| ahi 10|d # Set base to signed decimal (10), sign bit should depend on receiver size +| ahi 10u|du # Set base to unsigned decimal (11) +| ahi b # Set base to binary (2) +| ahi o # Set base to octal (8) +| ahi h # Set base to hexadecimal (16) +| ahi i # Set base to IP address (32) +| ahi p # Set base to htons(port) (3) +| ahi S # Set base to syscall (80) +| ahi s # Set base to string (1) + +Set base of the N-th immediate (indexing starts from 0): +| ahi 16 1 # Set base of the 1-st immediate to hexadecimal [0x00003d54]> pd 2 0x00003d54 0583000000 add eax, 0x83 @@ -505,7 +549,7 @@ which can be checked with `ah` command: ``` Sometimes we need to override jump or call address, for example in case of tricky -relocation, which is unknown for rizin, thus we can change the value manually. +relocation, which is unknown for Rizin, thus we can change the value manually. The current analysis information about a particular opcode can be checked with `ao` command. We can use `ahc` command for performing such a change: diff --git a/src/analysis/cpu_platform_profiles.md b/src/analysis/cpu_platform_profiles.md index 851217e3..64c92e42 100644 --- a/src/analysis/cpu_platform_profiles.md +++ b/src/analysis/cpu_platform_profiles.md @@ -5,7 +5,8 @@ The computer ecosystem, especially in embedded systems is vast and growing and is thus diverse and is full of trivial differences. CPUs and development boards differ by minor and sometimes large differences in their design, ports, MMIO registers and other peripherals. Rizin handles these -differences by storing the data regarding each CPUs and platforms in [SDB](https://book.rizin.re/basic_commands/sdb.html?highlight=SDB#sdb) files in a standard format, instead of hardcoding them with each of the +differences by storing the data regarding each CPUs and platforms in [SDB](../basic_commands/sdb.md) +files in a standard format, instead of hardcoding them with each of the disassembler plugins. This information will be parsed and added as flags and comments during the analysis loop and will show up in the disassembly and other places, making reverse engineering on those particular chips ets is much easier. This also helps in easy addition of a new port, in maintenance and in user-friendliness. @@ -15,9 +16,9 @@ chips ets is much easier. This also helps in easy addition of a new port, in mai ### CPU profiles All the specifics pertaining to a CPU is written down in a CPU profile. It is designed in a way that allows you -to enter CPU specefic values like: size of the RAM (`RAM_SIZE`), size of the ROM (`ROM_SIZE`) and many more. -CPU profiles can be selected using the configuration variable `asm.cpu`. Firstly, Rizin checks whether the -a CPU profile exists for the selected CPU and architecture. If it exists, Rizin generates the filepath of the +to enter CPU specific values like: size of the RAM (`RAM_SIZE`), size of the ROM (`ROM_SIZE`) and many more. +CPU profiles can be selected using the configuration variable `asm.cpu`. Firstly, Rizin checks whether the CPU profile +exists for the selected CPU and architecture. If it exists, Rizin generates the filepath of the profile and gets to a stage where it's ready to be loaded up. During analysis (`aa`), it's loaded up and the values are parsed and handled. CPU profiles also allow you to add information regarding the IO and extended IO registers of a CPU. The information pertaining to the IO and extended IO registers are added as flags at their corresponding offsets. @@ -37,10 +38,11 @@ PORTB=io ``` Here, `PINB` is the name and `io` is the type of the port and this will be added as a flag at the offset `0x03`. The type can be `ext_io` if it's an extended IO register, as well. Both will be added -as [flags](https://book.rizin.re/basic_commands/flags.html?highlight=flags#flags) and the only difference between -them is that they will be added in different [flagspaces](https://book.rizin.re/refcard/intro.html?highlight=flagspa#flagspaces). +as [flags](../basic_commands/flags.md) and the only difference between them is that they will be added in different +[flagspaces](../refcard/intro.md). -CPU profiles also support mapping the ROM. According the `ROM_ADDRESS` and `ROM_SIZE`, a [section](https://book.rizin.re/basic_commands/sections.html?highlight=section#sections) named `.rom` will be added during analysis. +CPU profiles also support mapping the ROM. According to the `ROM_ADDRESS` and `ROM_SIZE`, +a [section](../basic_commands/sections.md) named `.rom` will be added during analysis. #### Adding CPU profiles @@ -53,12 +55,13 @@ by the variable `cpus` in the corresponding architecture's disassembler plugin ( the SDB file in the directory, add the entry in the `meson.build` of the same directory and build again. Choose the right CPU and architecture and analyze again (`aa`) to load up the CPU profile. -For reference, you can see the previously added CPU profile of ATmega16 here: [librz/asm/cpus/avr-ATmega16.sdb.txt](https://github.com/rizinorg/rizin/blob/dev/librz/asm/cpus/avr-ATmega16.sdb.txt). +For reference, you can see the previously added CPU profile of ATmega16 here: +[librz/asm/cpus/avr-ATmega16.sdb.txt](https://github.com/rizinorg/rizin/blob/dev/librz/asm/cpus/avr-ATmega16.sdb.txt). ### Platform profiles Platform profiles are designed with an idea to support adding information that is pertaining to a specific board -or a micro controller. For example, most of the Raspberry Pi-s use a specific Broadcom chip and its peripherals +or a microcontroller. For example, most of the Raspberry Pi-s use a specific Broadcom chip and its peripherals like registers and interrupts will be the same for all Raspberry Pi-s. These profiles can be selected using the configuration variable `asm.platforms` and is loaded during analysis (`aa`). If you run `e asm.platform=?`, you can see the supported platforms by the selected architecture and CPU. @@ -75,7 +78,8 @@ AUX_MU_IO_REG.address=0x7e215040 AUX_MU_IO_REG.comment=Mini UART I/O Data ``` -Just like in CPU profiles, the `name` will be added as a flag and the `comment` as a [comment](https://book.rizin.re/disassembling/adding_metadata.html?highlight=Comment#adding-metadata-to-disassembly) (`CCu`). +Just like in CPU profiles, the `name` will be added as a flag and the `comment` as a +[comment](../disassembling/adding_metadata.md) (`CCu`). #### Adding platform profiles @@ -87,4 +91,5 @@ the `RzAsmPlugin` definition corresponding architecture's disassembler plugin. Y CPU is it's not already added. Then, add the entry in the `meson.build` of the same directory and build again. Choose the right CPU, architecture and platform and analyze again (`aa`). -You can also check out the platform profiles that were previously added at [librz/asm/platforms/arm-arm1176-bcm2835.sdb.txt](https://github.com/rizinorg/rizin/blob/dev/librz/asm/platforms/arm-arm1176-bcm2835.sdb.txt). +You can also check out the platform profiles that were previously added at +[librz/asm/platforms/arm-arm1176-bcm2835.sdb.txt](https://github.com/rizinorg/rizin/blob/dev/librz/asm/platforms/arm-arm1176-bcm2835.sdb.txt). diff --git a/src/analysis/emulation.md b/src/analysis/emulation.md index e429a716..2b5d8990 100644 --- a/src/analysis/emulation.md +++ b/src/analysis/emulation.md @@ -5,14 +5,14 @@ the core difference between static analysis and dynamic analysis. As many alread know, static analysis suffers from the path explosion problem, which is impossible to solve even in the most basic way without at least a partial emulation. -Thus many professional reverse engineering tools use code emulation while -performing an analysis of binary code, and rizin is no different here. +Thus, many professional reverse engineering tools use code emulation while +performing an analysis of binary code, and Rizin is no different here. -For partial emulation (or imprecise full emulation) rizin uses its own -[ESIL](../disassembling/esil.md) intermediate language and virtual machine. +For partial emulation (or imprecise full emulation) Rizin uses its own RzIL intermediate language, designed +to replace current [ESIL](../disassembling/esil.md). Rizin supports this kind of partial emulation for all platforms that -implement ESIL uplifting (x86/x86_64, ARM, arm64, MIPS, PowerPC, SPARC, AVR, 8051, Gameboy, ...). +implement ESIL uplifting (x86/x86_64, ARM, arm64, MIPS, PowerPC, SPARC, AVR, 8051, Game Boy, ...). One of the most common usages of such emulation is to calculate indirect jumps and conditional jumps. @@ -66,7 +66,7 @@ To manually setup the ESIL imprecise emulation you need to run this command sequ - a sequence of `aer` commands to set the initial register values. While performing emulation, please remember, that ESIL VM cannot emulate external calls -or system calls, along with SIMD instructions. Thus the most common scenario is to +or system calls, along with SIMD instructions. Thus, the most common scenario is to emulate only a small chunk of the code, like encryption/decryption, unpacking or calculating something. @@ -79,7 +79,7 @@ The commands interface for ESIL VM is almost identical to the debugging one: - `aesue ` to step until some specified ESIL expression met - `aec` to continue until break (Ctrl-C), this one is rarely used though, due to the omnipresence of external calls -In visual mode, all of the debugging hotkeys will work also in ESIL emulation mode. +In visual mode, all the debugging hotkeys will work also in ESIL emulation mode. Along with usual emulation, there is a possibility to record and replay mode: diff --git a/src/analysis/graphs.md b/src/analysis/graphs.md index ce84d180..9047bc41 100644 --- a/src/analysis/graphs.md +++ b/src/analysis/graphs.md @@ -1,6 +1,7 @@ # Graph commands -When analyzing data it is usually handy to have different ways to represent it in order to get new perspectives to allow the analyst to understand how different parts of the program interact. +When analyzing data it is usually handy to have different ways to represent it in order to get new perspectives to +allow the analyst to understand how different parts of the program interact. Representing basic block edges, function calls, string references as graphs show a very clear view of this information. @@ -8,91 +9,93 @@ Rizin supports various types of graph available through commands starting with ` ``` [0x00005000]> ag? -|Usage: ag [addr] -| Graph commands: -| aga[format] Data references graph -| agA[format] Global data references graph -| agc[format] Function callgraph -| agC[format] Global callgraph -| agd[format] [fcn addr] Diff graph -| agf[format] Basic blocks function graph -| agi[format] Imports graph -| agr[format] References graph -| agR[format] Global references graph -| agx[format] Cross references graph -| agg[format] Custom graph -| ag- Clear the custom graph -| agn[?] title body Add a node to the custom graph -| age[?] title1 title2 Add an edge to the custom graph - -Output formats: -| Ascii art -| * rizin commands -| d Graphviz dot -| g Graph Modelling Language (gml) -| j json ('J' for formatted disassembly) -| k SDB key-value -| t Tiny ascii art -| v Interactive ascii art -| w [path] Write to path or display graph image (see graph.gv.format and graph.web) +Usage: ag # Analysis graph commands +| aga =ascii # Data reference graph +| agA =ascii # Global data references graph +| agc =ascii # Function callgraph +| agC =ascii # Global callgraph +| agCi =ascii # Inter-procedual control flow graph +| agF =ascii # Control flow graph (without calls) +| agf =ascii # Basic blocks function graph +| agi =ascii # Imports graph +| agr =ascii # References graph +| agR =ascii # Global references graph +| ags =ascii # Normal graph +| agl =ascii # Line graph +| agx =ascii # Cross-references graph +| agI =ascii # RzIL graph of the instruction at the current offset. +| agg =ascii # Custom graph +| ag- # Clear the custom graph +| agn[-] # Managing custom graph nodes +| age[-] # Managing custom graph edges +| agw =dataref [-global] # Write to path or display graph image (see graph.gv.format) + +Formats: +| ascii # Ascii art +| cmd # rizin commands +| dot # Graphviz dot +| gml # Graph Modelling Language +| json # json +| json_disasm # json formatted disassembly +| sdb # SDB key-value +| interactive # Interactive ascii art ``` The structure of the commands is as follows: `ag `. -For example, `agid` displays the imports graph in dot format, while `aggj` +For example, `agi dot` displays the imports graph in dot format, while `agg json` outputs the custom graph in JSON format. Here's a short description for every output format available: -### Ascii Art ** (e.g. `agf`) +### Ascii Art (e.g. `agf`) Displays the graph directly to stdout using ASCII art to represent blocks and edges. -_Warning: displaying large graphs directly to stdout might prove to be computationally expensive and will make rizin not responsive for some time. In case of doubt, prefer using the interactive view (explained below)._ +_Warning: displaying large graphs directly to stdout might prove to be computationally expensive and will make Rizin +not responsive for some time. In case of doubt, prefer using the interactive view (explained below)._ -### Interactive Ascii Art (e.g. `agfv`) +### Interactive Ascii Art (e.g. `agf interactive`) Displays the ASCII graph in an interactive view similar to `VV` which allows to move the screen, zoom in / zoom out, ... -### Tiny Ascii Art (e.g. `agft`) +### Graphviz dot (e.g. `agf dot`) -Displays the ASCII graph directly to stdout in tiny mode (which is the same as reaching the maximum zoom out level in the interactive view). +Prints the dot source code representing the graph, which can be interpreted by programs such as +[graphviz](https://graphviz.gitlab.io/download/) or online viewers like [webgraphviz](http://www.webgraphviz.com/). -### Graphviz dot (e.g. `agfd`) - -Prints the dot source code representing the graph, which can be interpreted by programs such as [graphviz](https://graphviz.gitlab.io/download/) or online viewers like [this](http://www.webgraphviz.com/) - -### JSON (e.g. `agfj`) +### JSON (e.g. `agf json`) Prints a JSON string representing the graph. -- In the case of the `f` format (basic blocks of function), it will have detailed information about the function and will also contain the disassembly of the function (use `J` format for the formatted disassembly. - +- In the case of the `f` format (basic blocks of function), it will have detailed information about the function and + will also contain the disassembly of the function (use `J` format for the formatted disassembly). - In all other cases, it will only have basic information about the nodes of the graph (id, title, body, and edges). -### Graph Modelling Language (e.g. `agfg`) +### Graph Modelling Language (e.g. `agf gml`) -Prints the GML source code representing the graph, which can be interpreted by programs such as [yEd](https://www.yworks.com/products/yed/download) +Prints the GML source code representing the graph, which can be interpreted by programs such as +[yEd](https://www.yworks.com/products/yed/download) -### SDB key-value (e.g. `agfk`) +### SDB key-value (e.g. `agf sdb`) -Prints key-value strings representing the graph that was stored by sdb (rizin's string database). +Prints key-value strings representing the graph that was stored by sdb (Rizin's string database). -### R2 custom graph commands (e.g. `agf*`) +### Rizin custom graph commands (e.g. `agg`) -Prints rizin commands that would recreate the desired graph. The commands to construct the graph are `agn [title] [body]` to add a node and `age [title1] [title2]` to add an edge. +Prints rizin commands that would recreate the desired graph. The commands to construct the graph are +`agn [title] [body]` to add a node and `age [title1] [title2]` to add an edge. The `[body]` field can be expressed in base64 to include special formatting (such as newlines). -To easily execute the printed commands, it is possible to prepend a dot to the command (`.agf*`). - -### Web / image (e.g. `agfw`) - -Rizin will convert the graph to dot format, use the `dot` program to convert it to a `.gif` image and then try to find an already installed viewer on your system (`xdg-open`, `open`, ...) and display the graph there. +To easily execute the printed commands, it is possible to prepend a dot to the command (`.agg`). -The extension of the output image can be set with the `graph.extension` config variable. Available extensions are `png, jpg, gif, pdf, ps`. +### Web / image (e.g. `agw`) -_Note: for particularly large graphs, the most recommended extension is `svg` as it will produce images of much smaller size_ +Rizin will convert the graph to dot format, use the `dot` program to convert it to a `.gif` image and then try to find +an already installed viewer on your system (`xdg-open`, `open`, ...) and display the graph there. -If `graph.web` config variable is enabled, rizin will try to display the graph using the browser (_this feature is experimental and unfinished, and -disabled by default._) +The extension of the output image can be set with the `graph.gv.format` config variable. Available extensions are +`png, jpg, pdf, ps, svg, json`. +_Note: for particularly large graphs, the most recommended extension is `svg` as it will produce images of much +smaller size_ diff --git a/src/analysis/intro.md b/src/analysis/intro.md index 13cc4087..c8d0d417 100644 --- a/src/analysis/intro.md +++ b/src/analysis/intro.md @@ -6,13 +6,12 @@ basic blocks, opcode data, jump targets, cross-references, and much more. These operations are handled by the `a` (analyze) command family: ``` -|Usage: a[abdefFghoprxstc] [...] +[0x00001100]> a? +Usage: a [abdefFghoprxstc] [...] +| a* same as afl*;ah*;ax* | aa[?] analyze all (fcns + bbs) (aa0 to avoid sub renaming) | a8 [hexpairs] analyze bytes -| ab[b] [addr] analyze block at given address -| abb [len] analyze N basic blocks in [len] (section.size by default) -| abt [addr] find paths in the bb function graph from current offset to given address -| ac [cycles] analyze which op could be executed in [cycles] +| ab[?] [addr] analyze block | ad[?] analyze data trampoline (wip) | ad [from] [to] analyze data pointers to (from-to) | ae[?] [expr] analyze opcode eval expression (see ao) @@ -21,6 +20,8 @@ These operations are handled by the `a` (analyze) command family: | ag[?] [options] draw graphs in various formats | ah[?] analysis hints (force opcode size, ...) | ai [addr] address information (show perms, stack, heap, ...) +| aj same as a* but in json (aflj) +| aL list all asm/analysis plugins (e asm.arch=?) | an [name] [@addr] show/rename/create whatever flag/function is used at addr | ao[?] [len] analyze Opcodes (or emulate it) | aO[?] [len] Analyze N instructions in M bytes @@ -43,4 +44,3 @@ very different parts of the analysis: - Emulation using ESIL VM - Opcode introspection - Objects information, like virtual tables - diff --git a/src/analysis/symbols.md b/src/analysis/symbols.md index 8f450e8a..a4be6d32 100644 --- a/src/analysis/symbols.md +++ b/src/analysis/symbols.md @@ -16,34 +16,33 @@ In this case, all PDB files are available on the Microsoft server, which is by d is in options. See all pdb options in rizin: ``` -pdb.autoload = 0 -pdb.extract = 1 -pdb.server = https://msdl.microsoft.com/download/symbols -pdb.useragent = Microsoft-Symbol-Server/6.11.0001.402 +[0x00000000]> ell pdb +pdb.autoload = 0 ; Automatically load the required pdb files for loaded DLLs +pdb.extract = 1 ; Avoid extract of the pdb file, just download +pdb.server = https://msdl.microsoft.com/download/symbols ; Semi-colon separated list of base URLs for Microsoft symbol servers +pdb.symstore = /home/user/.local/share/rizin/pdb ; Path to downstream symbol store ``` -Using the variable `pdb.server` you can change the address where rizin will try to +Using the variable `pdb.server` you can change the address where Rizin will try to download the PDB file by the GUID stored in the executable header. -You can make use of multiple symbol servers by separating each URL with a semi-colon: +You can make use of multiple symbol servers by separating each URL with a semicolon: + ``` -e pdb.server=https://msdl.microsoft.com/download/symbols;https://symbols.mozilla.org/ +[0x00000000]> e pdb.server=https://msdl.microsoft.com/download/symbols;https://symbols.mozilla.org/ ``` -On Windows, you can also use local network share paths (UNC paths) as symbol servers. -Usually, there is no reason to change the default `pdb.useragent`, but who knows where -could it be handy? +On Windows, you can also use local network share paths (UNC paths) as symbol servers. Because those PDB files are stored as "cab" archives on the server, `pdb.extract=1` says to automatically extract them. Note that for the automatic downloading to work you need the "cabextract" tool, and wget/curl installed. -Sometimes you don't need to do that from the rizin itself, thus - two handy -rz-bin options: +Sometimes you don't need to do that from the Rizin itself, thus - two handy rz-bin options: ``` - -P show debug/pdb information - -PP download pdb file for binary + -P Show debug/pdb information + -PP Download pdb file for binary ``` where `-PP` automatically downloads the pdb for the selected binary, using those @@ -55,13 +54,9 @@ manipulated by the `id` commands: ``` [0x000051c0]> id? -|Usage: id Debug information -| Output mode: -| '*' Output in rizin commands -| id Source lines -| idp [file.pdb] Load pdb file information -| idpi [file.pdb] Show pdb file information -| idpd Download pdb file on remote server +Usage: id[jqp] # Debug commands +| id[jq] # Show DWARF source lines information +| idp[jidx] # PDB commands ``` Where `idpi` is basically the same as `rz-bin -P`. @@ -69,8 +64,8 @@ Note that `idp` can be also used not only in the static analysis mode but also in the debugging mode, even if connected via WinDbg. For simplifying the loading PDBs, especially for the processes with many linked DLLs, -rizin can autoload all required PDBs automatically - you need just set the -`e pdb.autoload=true` option. Then if you load some file in debugging mode +Rizin can autoload all required PDBs automatically - you need just set the +`e pdb.autoload=true` option. Then, if you load some file in debugging mode in Windows, using `rizin -d file.exe` or `rizin -d 2345` (attach to pid 2345), all related PDB files will be loaded automatically. diff --git a/src/analysis/syscalls.md b/src/analysis/syscalls.md index 6052be77..9c8ad12d 100644 --- a/src/analysis/syscalls.md +++ b/src/analysis/syscalls.md @@ -11,6 +11,7 @@ on the others, these can be different instructions, e.g. `syscall` on x86 PC. 0x00018a0e # 2: svc 0x82 ... ``` + Syscalls detection is driven by `asm.os`, `asm.bits`, and `asm.arch`. Be sure to set those configuration options accordingly. You can use `asl` command to check if syscalls' support is set up properly and as you expect. @@ -34,11 +35,10 @@ the addresses where particular syscalls were found and list them. 0x00018a0e sd_ble_gap_sec_info_reply ... ``` -To reduce searching time it is possible to [restrict the -searching](../search_bytes/configurating_the_search.md) range for -only executable segments or sections with `/as @e:search.in=io.maps.x` +To reduce searching time it is possible to [restrict the searching](../search_bytes/configurating_the_search.md) +range for only executable segments or sections with `/as @e:search.in=io.maps.x` -Using the [ESIL emulation](emulation.md) rizin can print syscall arguments +Using the [ESIL emulation](emulation.md) Rizin can print syscall arguments in the disassembly output. To enable the linear (but very rough) emulation use `asm.emu` configuration variable: ``` @@ -50,8 +50,9 @@ in the disassembly output. To enable the linear (but very rough) emulation use ``` In case of executing `aae` (or `aaaa` which calls `aae`) command -rizin will push found syscalls to a special `syscall.` flagspace, +Rizin will push found syscalls to a special `syscall.` flagspace, which can be useful for automation purpose: + ``` [0x000187c2]> fs 0 0 * imports @@ -77,7 +78,9 @@ It also can be interactively navigated through within HUD mode (`V_`) 0x0002ac36 syscall.sd_ble_gap_disconnect.3 ``` -When debugging in rizin, you can use `dcs` to continue execution until the next syscall. You can also run `dcs*` to trace all syscalls. +When debugging in Rizin, you can use `dcs` to continue execution until the next syscall. You can also run `dcs*` to +trace all syscalls. + ``` [0xf7fb9120]> dcs* Running child until syscalls:-1 @@ -90,7 +93,8 @@ child stopped with signal 133 child stopped with signal 133 ``` -rizin also has a syscall name to syscall number utility. You can return the syscall name of a given syscall number or vice versa, without leaving the shell. +rizin also has a syscall name to syscall number utility. You can return the syscall name of a given syscall number +or vice versa, without leaving the shell. ``` [0x08048436]> asr 1 diff --git a/src/analysis/types.md b/src/analysis/types.md index 27baf581..bfd9e44c 100644 --- a/src/analysis/types.md +++ b/src/analysis/types.md @@ -3,27 +3,27 @@ Rizin supports C-syntax data type definitions. Most of the related commands are located in `t` namespace: ``` -[0x000051c0]> t? +[0x00001100]> t? Usage: t[?] # Types, noreturn, signatures, C parser and more -| t[j*] [] # List all types / Show type information -| t- # Remove the type -| t-* # Remove all types -| tc[dc] # List loaded types in C format -| td # Define type from C definition -| te[jbcdf] # List loaded enums -| tf[j-c?] # List loaded functions definitions -| tn[j-?] # Manage noreturn function attributes and marks -| to[es] # Open C header file and load types from it -| tp[vx] # Print formatted type casted to the address -| ts[jlcd] # List loaded structures -| tt[jc] # List loaded typedefs -| tu[jlcd] # List loaded unions -| tx[fgl] # Type xrefs +| t[j*l] [] # List all types / Show type information +| t- # Remove the type +| t-* # Remove all types +| tc[dc] # List loaded types in C format +| td # Define type from C definition +| te[jbcdf] # List loaded enums +| tf[j-c?] # List loaded functions definitions +| tn[j-?] # Manage noreturn function attributes and marks +| to[es] # Open C header file and load types from it +| tp[vx] # Print formatted type casted to the address +| ts[jlcd] # List loaded structures +| tt[jc] # List loaded typedefs +| tu[jlcd] # List loaded unions +| tx[fgl] # Type xrefs ``` Note that the basic (atomic) types are not those from the C standard - not `char`, `_Bool`, or `short`. Because those types can be different -from one platform to another, rizin uses `definite` types like as +from one platform to another, Rizin uses `definite` types like as `int8_t` or `uint64_t` and will convert `int` to `int32_t` or `int64_t` depending on the binary or debuggee platform/compiler. @@ -49,7 +49,7 @@ long long There are three easy ways to define a new type: * Passing a string to the `td` command * Passing a file with the `to ` command -* Using your defined `cfg.editor` by calling the the `to -` command +* Using your defined `cfg.editor` by calling the `to -` command ``` [0x000051c0]> td "struct foo {char* a; int b;}" @@ -135,16 +135,16 @@ Moreover, the link will be shown in the disassembly output or visual mode: 0x00005202 mov rbp, rsp ``` -Once the struct is linked, rizin tries to propagate structure offset in the function at the current +Once the struct is linked, Rizin tries to propagate structure offset in the function at the current offset, to run this analysis on the whole program or at any targeted functions after all structs are linked you have `aat` command: ``` -[0x00000000]> aa? -| aat [] # Analyze all/given function to convert immediate to linked structure offsets +[0x00000000]> aat? +Usage: aat [] # Analyze all/given function to convert immediate to linked structure offsets ``` -Note sometimes the emulation may not be accurate, for example as below : +Note sometimes the emulation may not be accurate, for example as below: ```` |0x000006da push rbp @@ -161,8 +161,8 @@ The return value of `malloc` may differ between two emulations, so you have to s value manually using `ahr` command, so run `tl` or `aat` command after setting up the return value hint. ``` -[0x000006da]> ah? -| ahr # Set function return value hint +[0x000006da]> ahr? +Usage: ahr # Set function return value hint ``` ### Structure Immediates diff --git a/src/analysis/variables.md b/src/analysis/variables.md index 9f1d5740..7a928986 100644 --- a/src/analysis/variables.md +++ b/src/analysis/variables.md @@ -7,44 +7,42 @@ configuration option. The main variables commands are located in `afv` namespace: ``` -Usage: afv [rbs] -| afv* output rizin command to add args/locals to flagspace -| afv-([name]) remove all or given var -| afv= list function variables and arguments with disasm refs -| afva analyze function arguments/locals -| afvb[?] manipulate bp based arguments/locals -| afvd name output rizin command for displaying the value of args/locals in the debugger -| afvf show BP relative stackframe variables -| afvn [new_name] ([old_name]) rename argument/local -| afvr[?] manipulate register based arguments -| afvR [varname] list addresses where vars are accessed (READ) -| afvs[?] manipulate sp based arguments/locals -| afvt [name] [new_type] change type for given argument/local -| afvW [varname] list addresses where vars are accessed (WRITE) -| afvx show function variable xrefs (same as afvR+afvW) -``` - -`afvr`, `afvb` and `afvs` commands are uniform but allow manipulation of -register-based arguments and variables, BP/FP-based arguments and variables, -and SP-based arguments and variables respectively. +[0x00001100]> afv? +Usage: afv # Manipulate arguments/variables in a function +| afvl[j*lt] # List all variables and arguments of the current function +| afv= # List function variables and arguments with disasm refs +| afv- # Remove all variables/arguments or just the specified one +| afva # Analyze function arguments/locals +| afvd [] # Display the value of arguments/variables +| afvf # Show BP relative stackframe variables +| afvn [] # Rename argument/variable in current function +| afvR [] # List addresses where vars are accessed (READ) +| afvW [] # List addresses where vars are accessed (WRITE) +| afvt # Change type for given argument/local +| afvx[jav] # Show argument/variable xrefs in a function +| afvs[j*-gs?] # Manipulate stack-based arguments/locals +| afvr[j*-gs?] # Manipulate register-based arguments/locals +``` + +`afvr` and `afvs` commands are uniform but allow manipulation of +register-based arguments and variables, and SP-based arguments and variables respectively. If we check the help for `afvr` we will get the way two others commands works too: ``` -|Usage: afvr [reg] [type] [name] -| afvr list register based arguments -| afvr* same as afvr but in rizin commands -| afvr [reg] [name] ([type]) define register arguments -| afvrj return list of register arguments in JSON format -| afvr- [name] delete register arguments at the given index -| afvrg [reg] [addr] define argument get reference -| afvrs [reg] [addr] define argument set reference +[0x00001100]> afvr? +Usage: afvr[j*-gs?] # Manipulate register-based arguments/locals +| afvr[j*] [ []] # List register-based arguments and locals / Define a new one +| afvr- # Delete register-based argument/local with the given name +| afvr-* # Delete all register-based arguments/locals +| afvrg # Define register-based arguments and locals get references +| afvrs # Define register-based arguments and locals set references ``` -Like many other things variables detection is performed by rizin automatically, but results +Like many other things variables detection is performed by Rizin automatically, but results can be changed with those arguments/variables control commands. This kind of analysis relies heavily on preloaded function prototypes and the calling-convention, thus loading symbols can improve it. Moreover, after changing something we can rerun variables analysis with -`afva` command. Quite often variables analysis is accompanied with +`afva` command. Quite often variables analysis is accompanied by [types analysis](types.md), see `afta` command. The most important aspect of reverse engineering - naming things. Of course, you can rename @@ -53,33 +51,33 @@ _any_ type of argument or variable. Or you can simply remove the variable or arg `afv-` command. As mentioned before the analysis loop relies heavily on types information while performing -variables analysis stages. Thus comes next very important command - `afvt`, which -allows you to change the type of variable: - -``` -[0x00003b92]> afvs -var int local_8h @ rsp+0x8 -var int local_10h @ rsp+0x10 -var int local_28h @ rsp+0x28 -var int local_30h @ rsp+0x30 -var int local_32h @ rsp+0x32 -var int local_38h @ rsp+0x38 -var int local_45h @ rsp+0x45 -var int local_46h @ rsp+0x46 -var int local_47h @ rsp+0x47 -var int local_48h @ rsp+0x48 -[0x00003b92]> afvt local_10h char* -[0x00003b92]> afvs -var int local_8h @ rsp+0x8 -var char* local_10h @ rsp+0x10 -var int local_28h @ rsp+0x28 -var int local_30h @ rsp+0x30 -var int local_32h @ rsp+0x32 -var int local_38h @ rsp+0x38 -var int local_45h @ rsp+0x45 -var int local_46h @ rsp+0x46 -var int local_47h @ rsp+0x47 -var int local_48h @ rsp+0x48 +variables analysis stages. Let's see all variables that found by Rizin: + +``` +[0x000011e9]> afvs +var unknown_t var_48h @ stack - 0x48 +var unknown_t var_3ch @ stack - 0x3c +var unknown_t var_30h @ stack - 0x30 +var unknown_t var_28h @ stack - 0x28 +var unknown_t var_20h @ stack - 0x20 +var unknown_t var_18h @ stack - 0x18 +var unknown_t var_10h @ stack - 0x10 +``` + +`unknown_t` means an "undefined" default type for variable whose exact type can't be inferred. Thus comes very +important command - `afvt`, which allows you to change the type of variable: + +``` +[0x000011e9]> afvt var_48h const char * + +[0x000011e9]> afvs +var const char *var_48h @ stack - 0x48 +var unknown_t var_3ch @ stack - 0x3c +var unknown_t var_30h @ stack - 0x30 +var unknown_t var_28h @ stack - 0x28 +var unknown_t var_20h @ stack - 0x20 +var unknown_t var_18h @ stack - 0x18 +var unknown_t var_10h @ stack - 0x10 ``` Less commonly used feature, which is still under heavy development - distinction between @@ -88,95 +86,179 @@ being written with `afvW` command. Both commands provide a list of the places th are performed: ``` -[0x00003b92]> afvR -local_48h 0x48ee -local_30h 0x3c93,0x520b,0x52ea,0x532c,0x5400,0x3cfb -local_10h 0x4b53,0x5225,0x53bd,0x50cc -local_8h 0x4d40,0x4d99,0x5221,0x53b9,0x50c8,0x4620 -local_28h 0x503a,0x51d8,0x51fa,0x52d3,0x531b -local_38h -local_45h 0x50a1 -local_47h -local_46h -local_32h 0x3cb1 +[0x000011e9]> afvR + var_3ch + var_48h + var_30h 0x1212,0x1254 + var_28h 0x1222,0x1267 + var_20h 0x1232 + var_18h 0x1236 + var_10h 0x124d,0x1258,0x126b,0x127a,0x1286 + [0x00003b92]> afvW -local_48h 0x3adf -local_30h 0x3d3e,0x4868,0x5030 -local_10h 0x3d0e,0x5035 -local_8h 0x3d13,0x4d39,0x5025 -local_28h 0x4d00,0x52dc,0x53af,0x5060,0x507a,0x508b -local_38h 0x486d -local_45h 0x5014,0x5068 -local_47h 0x501b -local_46h 0x5083 -local_32h +[0x000011e9]> afvW + var_3ch 0x11f5 + var_48h 0x11f8 + var_30h 0x1203 + var_28h 0x120e + var_20h 0x121e + var_18h 0x122e + var_10h 0x1249 [0x00003b92]> ``` ## Type inference -The type inference for local variables and arguments is well integrated with the command `afta`. - -Let's see an example of this with a simple [hello_world](https://github.com/rizinorg/rizinbook/tree/master/examples/hello_world) binary - -``` -[0x000007aa]> pdf -| ;-- main: -/ (fcn) sym.main 157 -| sym.main (); -| ; var int local_20h @ rbp-0x20 -| ; var int local_1ch @ rbp-0x1c -| ; var int local_18h @ rbp-0x18 -| ; var int local_10h @ rbp-0x10 -| ; var int local_8h @ rbp-0x8 -| ; DATA XREF from entry0 (0x6bd) -| 0x000007aa push rbp -| 0x000007ab mov rbp, rsp -| 0x000007ae sub rsp, 0x20 -| 0x000007b2 lea rax, str.Hello ; 0x8d4 ; "Hello" -| 0x000007b9 mov qword [local_18h], rax -| 0x000007bd lea rax, str.r2_folks ; 0x8da ; " r2-folks" -| 0x000007c4 mov qword [local_10h], rax -| 0x000007c8 mov rax, qword [local_18h] -| 0x000007cc mov rdi, rax -| 0x000007cf call sym.imp.strlen ; size_t strlen(const char *s) -``` - -* After applying `afta` - -``` -[0x000007aa]> afta -[0x000007aa]> pdf -| ;-- main: -| ;-- rip: -/ (fcn) sym.main 157 -| sym.main (); -| ; var size_t local_20h @ rbp-0x20 -| ; var size_t size @ rbp-0x1c -| ; var char *src @ rbp-0x18 -| ; var char *s2 @ rbp-0x10 -| ; var char *dest @ rbp-0x8 -| ; DATA XREF from entry0 (0x6bd) -| 0x000007aa push rbp -| 0x000007ab mov rbp, rsp -| 0x000007ae sub rsp, 0x20 -| 0x000007b2 lea rax, str.Hello ; 0x8d4 ; "Hello" -| 0x000007b9 mov qword [src], rax -| 0x000007bd lea rax, str.r2_folks ; 0x8da ; " r2-folks" -| 0x000007c4 mov qword [s2], rax -| 0x000007c8 mov rax, qword [src] -| 0x000007cc mov rdi, rax ; const char *s -| 0x000007cf call sym.imp.strlen ; size_t strlen(const char *s) -``` - -It also extracts type information from format strings like `printf ("fmt : %s , %u , %d", ...)`, the format specifications are extracted from `analysis/d/spec.sdb` - -You could create a new profile for specifying a set of format chars depending on different libraries/operating systems/programming languages like this : +The type inference for local variables and arguments is well integrated with the command `aft`. + +Let's see an example of this with a simple +[hello_world](https://github.com/rizinorg/book/tree/master/examples/hello_world) binary: + +``` +[0x00001100]> aa +[x] Analyze all flags starting with sym. and entry0 (aa) + +[0x00001100]> s main + +[0x000011e9]> pdf + ; DATA XREF from entry0 @ 0x1118 +/ int main(int argc, char **argv, char **envp); +| ; arg int argc @ rdi +| ; arg char **argv @ rsi +| ; var int64_t var_48h @ stack - 0x48 +| ; var int64_t var_3ch @ stack - 0x3c +| ; var int64_t var_30h @ stack - 0x30 +| ; var int64_t var_28h @ stack - 0x28 +| ; var int64_t var_20h @ stack - 0x20 +| ; var int64_t var_18h @ stack - 0x18 +| ; var int64_t var_10h @ stack - 0x10 +| 0x000011e9 endbr64 +| 0x000011ed push rbp +| 0x000011ee mov rbp, rsp +| 0x000011f1 sub rsp, 0x40 +| 0x000011f5 mov dword [var_3ch], edi ; argc +| 0x000011f8 mov qword [var_48h], rsi ; argv +| 0x000011fc lea rax, [str.Hello] ; 0x2004 ; "Hello " +| 0x00001203 mov qword [var_30h], rax +| 0x00001207 lea rax, [str.world] ; 0x200b ; "world!" +| 0x0000120e mov qword [var_28h], rax +| 0x00001212 mov rax, qword [var_30h] +| 0x00001216 mov rdi, rax +| 0x00001219 call sym.imp.strlen ; sym.imp.strlen ; size_t strlen(const char *s) +| 0x0000121e mov qword [var_20h], rax +| 0x00001222 mov rax, qword [var_28h] +| 0x00001226 mov rdi, rax +| 0x00001229 call sym.imp.strlen ; sym.imp.strlen ; size_t strlen(const char *s) +| 0x0000122e mov qword [var_18h], rax +| 0x00001232 mov rdx, qword [var_20h] +| 0x00001236 mov rax, qword [var_18h] +| 0x0000123a add rax, rdx +| 0x0000123d add rax, 1 +| 0x00001241 mov rdi, rax +| 0x00001244 call sym.imp.malloc ; sym.imp.malloc ; void *malloc(size_t size) +| 0x00001249 mov qword [var_10h], rax +| 0x0000124d cmp qword [var_10h], 0 +| ,=< 0x00001252 je 0x1292 +| | 0x00001254 mov rdx, qword [var_30h] +| | 0x00001258 mov rax, qword [var_10h] +| | 0x0000125c mov rsi, rdx +| | 0x0000125f mov rdi, rax +| | 0x00001262 call sym.imp.strcpy ; sym.imp.strcpy ; char *strcpy(char *dest, const char *src) +| | 0x00001267 mov rdx, qword [var_28h] +| | 0x0000126b mov rax, qword [var_10h] +| | 0x0000126f mov rsi, rdx +| | 0x00001272 mov rdi, rax +| | 0x00001275 call sym.imp.strcat ; sym.imp.strcat ; char *strcat(char *s1, const char *s2) +| | 0x0000127a mov rax, qword [var_10h] +| | 0x0000127e mov rdi, rax +| | 0x00001281 call sym.imp.puts ; sym.imp.puts ; int puts(const char *s) +| | 0x00001286 mov rax, qword [var_10h] +| | 0x0000128a mov rdi, rax +| | 0x0000128d call sym.imp.free ; sym.imp.free ; void free(void *ptr) +| `-> 0x00001292 mov eax, 0 +| 0x00001297 leave +\ 0x00001298 ret +``` + +After applying `aft`: + +``` +[0x000011e9]> aeim + +[0x000011e9]> aft + +[0x000011e9]> pdf + ; DATA XREF from entry0 @ 0x1118 + ;-- rip: +/ int main(int argc, char **argv, char **envp); +| ; arg int argc @ rdi +| ; arg char **argv @ rsi +| ; var char **var_48h @ stack - 0x48 +| ; var int var_3ch @ stack - 0x3c +| ; var const char *src @ stack - 0x30 +| ; var const char *s2 @ stack - 0x28 +| ; var size_t var_20h @ stack - 0x20 +| ; var size_t size @ stack - 0x18 +| ; var char *dest @ stack - 0x10 +| 0x000011e9 endbr64 +| 0x000011ed push rbp +| 0x000011ee mov rbp, rsp +| 0x000011f1 sub rsp, 0x40 +| 0x000011f5 mov dword [var_3ch], edi ; argc +| 0x000011f8 mov qword [var_48h], rsi ; argv +| 0x000011fc lea rax, [str.Hello] ; 0x2004 ; "Hello " +| 0x00001203 mov qword [src], rax +| 0x00001207 lea rax, [str.world] ; 0x200b ; "world!" +| 0x0000120e mov qword [s2], rax +| 0x00001212 mov rax, qword [src] +| 0x00001216 mov rdi, rax ; const char *s +| 0x00001219 call sym.imp.strlen ; sym.imp.strlen ; size_t strlen(const char *s) +| 0x0000121e mov qword [var_20h], rax +| 0x00001222 mov rax, qword [s2] +| 0x00001226 mov rdi, rax ; const char *s +| 0x00001229 call sym.imp.strlen ; sym.imp.strlen ; size_t strlen(const char *s) +| 0x0000122e mov qword [size], rax +| 0x00001232 mov rdx, qword [var_20h] +| 0x00001236 mov rax, qword [size] +| 0x0000123a add rax, rdx +| 0x0000123d add rax, 1 +| 0x00001241 mov rdi, rax ; size_t size +| 0x00001244 call sym.imp.malloc ; sym.imp.malloc ; void *malloc(size_t size) +| 0x00001249 mov qword [dest], rax +| 0x0000124d cmp qword [dest], 0 +| ,=< 0x00001252 je 0x1292 +| | 0x00001254 mov rdx, qword [src] +| | 0x00001258 mov rax, qword [dest] +| | 0x0000125c mov rsi, rdx ; const char *src +| | 0x0000125f mov rdi, rax ; char *dest +| | 0x00001262 call sym.imp.strcpy ; sym.imp.strcpy ; char *strcpy(char *dest, const char *src) +| | 0x00001267 mov rdx, qword [s2] +| | 0x0000126b mov rax, qword [dest] +| | 0x0000126f mov rsi, rdx ; const char *s2 +| | 0x00001272 mov rdi, rax ; char *s1 +| | 0x00001275 call sym.imp.strcat ; sym.imp.strcat ; char *strcat(char *s1, const char *s2) +| | 0x0000127a mov rax, qword [dest] +| | 0x0000127e mov rdi, rax ; const char *s +| | 0x00001281 call sym.imp.puts ; sym.imp.puts ; int puts(const char *s) +| | 0x00001286 mov rax, qword [dest] +| | 0x0000128a mov rdi, rax ; void *ptr +| | 0x0000128d call sym.imp.free ; sym.imp.free ; void free(void *ptr) +| `-> 0x00001292 mov eax, 0 +| 0x00001297 leave +\ 0x00001298 ret +``` + +It also extracts type information from format strings like `printf ("fmt : %s , %u , %d", ...)`, +the format specifications are extracted from `analysis/d/spec.sdb` + +You could create a new profile for specifying a set of format chars depending on different libraries/operating +systems/programming languages like this: ``` win=spec spec.win.u32=unsigned int ``` + Then change your default specification to a newly created one using this config variable `e analysis.spec=win` -For more information about primitive and user-defined types support in rizin refer to [types](types.md) chapter. +For more information about primitive and user-defined types support in Rizin refer to [types](types.md) chapter. diff --git a/src/analysis/vtables.md b/src/analysis/vtables.md index 5e0ab19c..8533647e 100644 --- a/src/analysis/vtables.md +++ b/src/analysis/vtables.md @@ -9,16 +9,17 @@ Currently, the support is very basic, allowing you only to inspect parsed tables. ``` -|Usage: av[?jr*] C++ vtables and RTTI -| av search for vtables in data sections and show results -| avj like av, but as json -| av* like av, but as rizin commands -| avr[j@addr] try to parse RTTI at vtable addr (see analysis.cpp.abi) -| avra[j] search for vtables and try to parse RTTI at each of them +Usage: av[j*gr?] # C++ vtables and RTTI +| av[j*] # search for vtables in data sections and show results +| avg[?] # Global variables +| avr[j] # try to parse RTTI at vtable addr (see analysis.cpp.abi) +| avra[j] # search for vtables and try to parse RTTI at each of them +| avrr # recover class info from all findable RTTI (see ac) +| avrD # demangle a class name from RTTI ``` The main commands here are `av` and `avr`. `av` lists all virtual tables -found when rizin opened the file. If you are not happy with the result +found when Rizin opened the file. If you are not happy with the result you may want to try to parse a virtual table at a particular address with `avr` command. `avra` performs the search and parsing of all virtual -tables in the binary, like rizin does during the file opening. +tables in the binary, like Rizin does during the file opening. diff --git a/src/basic_commands/block_size.md b/src/basic_commands/block_size.md index b6191603..f7256aff 100644 --- a/src/basic_commands/block_size.md +++ b/src/basic_commands/block_size.md @@ -1,19 +1,16 @@ ## Block Size -The block size determines how many bytes rizin commands will process when not given an explicit size argument. You can temporarily change the block size by specifying a numeric argument to the print commands. For example `px 20`. +The block size determines how many bytes rizin commands will process when not given an explicit size argument. +You can temporarily change the block size by specifying a numeric argument to the print commands. For example `px 20`. ``` [0x00000000]> b? -Usage: b[f] [arg] # Get/Set block size -| b 33 set block size to 33 -| b eip+4 numeric argument can be an expression -| b display current block size -| b+3 increase blocksize by 3 -| b-16 decrease blocksize by 16 -| b* display current block size in rizin command -| bf foo set block size to flag size -| bj display block size information in JSON -| bm 1M set max block size +Usage: b[j*-+fm] # Display or change the block size +| b[j*] [] # Set/Get current block size +| b- # Decrease current block size +| b+ # Increase current block size +| bf # Set block size to flag size +| bm [] # Set/Get max block size ``` The `b` command is used to change the block size: @@ -24,7 +21,9 @@ The `b` command is used to change the block size: [0x00000000]> b-32 # ... = 0xf0 ``` -The `bf` command is used to change the block size to value specified by a flag. For example, in symbols, the block size of the flag represents the size of the function. To make that work, you have to either run function analysis `af` (which is included in `aa`) or manually seek and define some functions e.g. via `Vd`. +The `bf` command is used to change the block size to value specified by a flag. For example, in symbols, the block size +of the flag represents the size of the function. To make that work, you have to either run function analysis `af` +(which is included in `aa`) or manually seek and define some functions e.g. via `Vd`. ``` [0x00000000]> bf sym.main # block size = sizeof(sym.main) @@ -37,7 +36,8 @@ You can combine two operations in a single `pdf` command. Except that `pdf` neit [0x00000000]> pdf @ sym.main # disassemble sym.main ``` -Another way around is to use special variables `$FB` and `$FS` which denote Function's Beginning and Size at the current seek. Read more about [Usable variables](../refcard/intro.md#usable-variables-in-expression). +Another way around is to use special variables `$FB` and `$FS` which denote Function's Beginning and Size at the +current seek. Read more about [Usable variables](../refcard/intro.md#usable-variables-in-expression). ``` [0x00000000]> s sym.main + 0x04 diff --git a/src/basic_commands/comparing_bytes.md b/src/basic_commands/comparing_bytes.md index f0f72289..8b974435 100644 --- a/src/basic_commands/comparing_bytes.md +++ b/src/basic_commands/comparing_bytes.md @@ -1,52 +1,46 @@ ## Comparing Bytes -For most generic reverse engineering tasks like finding the differences between two binary files, which bytes has changed, find differences in the graphs of the code analysis results, and other diffing operations you can just use rz-diff: +For most generic reverse engineering tasks like finding the differences between two binary files, which bytes has +changed, find differences in the graphs of the code analysis results, and other diffing operations you can just use +rz-diff: ``` $ rz-diff -h ``` -Inside rizin, the functionalities exposed by rz-diff are available with the `c` command. +Inside Rizin, the functionalities exposed by rz-diff are available with the `c` command. -`c` (short for "compare") allows you to compare arrays of bytes from different sources. The command accepts input in a number of formats and then compares it against values found at current seek position. +`c` (short for "compare") allows you to compare arrays of bytes from different sources. The command accepts input +in a number of formats and then compares it against values found at current seek position. ``` -[0x00404888]> c? -Usage: c[?dfx] [argument] # Compare -| c [string] Compare a plain with escaped chars string -| c* [string] Same as above, but printing rizin commands instead -| c1 [addr] Compare 8 bits from current offset -| c2 [value] Compare a word from a math expression -| c4 [value] Compare a doubleword from a math expression -| c8 [value] Compare a quadword from a math expression -| cat [file] Show contents of file (see pwd, ls) -| cc [at] Compares in two hexdump columns of block size -| ccc [at] Same as above, but only showing different lines -| ccd [at] Compares in two disasm columns of block size -| cd [dir] chdir -| cf [file] Compare contents of file at current seek -| cg[?] [o] [file] Graphdiff current file and [file] -| cl|cls|clear Clear screen, (clear0 to goto 0, 0 only) -| cu[?] [addr] @at Compare memory hexdumps of $$ and dst in unified diff -| cud [addr] @at Unified diff disasm from $$ and given address -| cv[1248] [hexpairs] @at Compare 1,2,4,8-byte (silent return in $?) -| cV[1248] [addr] @at Compare 1,2,4,8-byte address contents (silent, return in $?) -| cw[?] [us?] [...] Compare memory watchers -| cx [hexpair] Compare hexpair string (use '.' as nibble wildcard) -| cx* [hexpair] Compare hexpair string (output rizin commands) -| cX [addr] Like 'cc' but using hexdiff output +[0x00001100]> c? +Usage: c[?] # Compare block with given data +| c[j] # Compare an escaped with data at current offset +| c1 # Compare 8-bit data at current offset with the data at +| ca[j] # Compare bytes of data at with the data at current offset +| cb[j] # Compare (up to 8) bytes at current offset with a number +| cc # Compare hexdump of data of block size at with the data at current offset +| ccc # Show different lines between hexdump of a block of data at wth the data at current offset +| ccd [] # Compare disassembly of block size at and at the current offset +| cf[j] # Compare the contents of with the data at current offset +| cu[1248d] # Unified diff commands +| cw[lrux] # Compare watcher commands +| cx[j] # Compare data at current offset with a hexpair string (also return in $?) +| cX[j] # Compare hexdump of data of block size at with the data at current offset using hexdiff output ``` To compare memory contents at current seek position against a given string of values, use `cx`: ``` -[0x08048000]> p8 4 -7f 45 4c 46 +[0x00001100]> p8 4 +7f454c46 -[0x08048000]> cx 7f 45 90 46 +[0x00001100]> cx 7f 45 90 46 Compare 3/4 equal bytes -0x00000002 (byte=03) 90 ' ' -> 4c 'L' -[0x08048000]> +0x00001102 (byte=03) 90 ' ' -> 4c 'L' +Compare 3/4 equal bytes (75%) +[0x00001100]> ``` Another subcommand of the `c` command is `cc` which stands for "compare code". @@ -62,18 +56,7 @@ To compare contents of two functions specified by their names: [0x08049A80]> cc sym.main2 @ sym.main ``` -`c8` compares a quadword from the current seek (in the example below, 0x00000000) against a math expression: - -``` -[0x00000000]> c8 4 - -Compare 1/8 equal bytes (0%) -0x00000000 (byte=01) 7f ' ' -> 04 ' ' -0x00000001 (byte=02) 45 'E' -> 00 ' ' -0x00000002 (byte=03) 4c 'L' -> 00 ' ' -``` - -The number parameter can, of course, be math expressions which use flag names and anything allowed in an expression: +The number parameter can be math expressions which use flag names and anything allowed in an expression: ``` [0x00000000]> cx 7f469046 @@ -86,8 +69,8 @@ Compare 2/4 equal bytes You can use the compare command to find differences between a current block and a file previously dumped to a disk: ``` -rizin /bin/true -[0x08049A80]> s 0 -[0x08048000]> cf /bin/true -Compare 512/512 equal bytes +rizin /usr/bin/true +[0x00002680]> s 0 +[0x00000000]> cf /usr/bin/true +Compare 256/256 equal bytes (100%) ``` diff --git a/src/basic_commands/dietline.md b/src/basic_commands/dietline.md index 8c783c69..4fb6987f 100644 --- a/src/basic_commands/dietline.md +++ b/src/basic_commands/dietline.md @@ -1,6 +1,7 @@ # Dietline -Rizin comes with the lean [readline](https://en.wikipedia.org/wiki/GNU_Readline)-like input capability through the lean library to handle the command edition and history navigation. It allows users to perform cursor movements, search the +Rizin comes with the lean [readline](https://en.wikipedia.org/wiki/GNU_Readline)-like input capability through the lean +library to handle the command edition and history navigation. It allows users to perform cursor movements, search the history, and implements autocompletion. Moreover, due to the rizin portability, dietline provides the uniform experience among all supported platforms. It is used in all rizin subshells - main prompt, SDB shell, visual prompt, and offsets prompt. It also implements the most common features @@ -20,7 +21,7 @@ the `true` (as such: `e scr.prompt.popup=true`). # Emacs (default) mode -By default dietline mode is compatible with readline Emacs-like mode key bindings. Thus active are: +By default, dietline mode is compatible with readline Emacs-like mode key bindings. Thus, active are: ## Moving - `Ctrl-a` - move to the beginning of the line @@ -38,7 +39,8 @@ By default dietline mode is compatible with readline Emacs-like mode key binding ## Killing and Yanking - `Ctrl-k` - kill the text from point to the end of the line. - `Ctrl-x` - kill backward from the cursor to the beginning of the current line. -- `Ctrl-t` - kill from point to the end of the current word, or if between words, to the end of the next word. Word boundaries are the same as forward-word. +- `Ctrl-t` - kill from point to the end of the current word, or if between words, to the end of the next word. + Word boundaries are the same as forward-word. - `Ctrl-w` - kill the word behind point, using white space as a word boundary. The killed text is saved on the kill-ring. - `Ctrl-y` - yank the top of the kill ring into the buffer at point. - `Ctrl-]` - rotate the kill-ring, and yank the new top. You can only do this if the prior command is yank or yank-pop. @@ -48,7 +50,8 @@ By default dietline mode is compatible with readline Emacs-like mode key binding # Vi mode -Rizin also comes with in vi mode that can be enabled by toggling `e scr.prompt.vi=true`). The various keybindings available in this mode are: +Rizin also comes with in vi mode that can be enabled by toggling `e scr.prompt.vi=true`. The various keybindings +available in this mode are: ## Entering command modes - `ESC` - enter into the control mode @@ -76,8 +79,11 @@ Rizin also comes with in vi mode that can be enabled by toggling `e scr.prompt.v - `dl` - delete a character to the right - `d$` - kill the text from point to the end of the line. - `d^` - kill backward from the cursor to the beginning of the current line. -- `de` - kill from point to the end of the current word, or if between words, to the end of the next word. Word boundaries are the same as forward-word. +- `de` - kill from point to the end of the current word, or if between words, to the end of the next word. + Word boundaries are the same as forward-word. - `p` - yank the top of the kill ring into the buffer at point. -- `c` - acts similar to d based commands, but goes into insert mode in the end by prefixing the commands with numbers, the command is performed multiple times. +- `c` - acts similar to d based commands, but goes into insert mode in the end by prefixing the commands with numbers, + the command is performed multiple times. -If you are finding it hard to keep track of which mode you are in, just enter `e scr.prompt.mode=true` to update the color of the prompt based on the vi-mode. \ No newline at end of file +If you are finding it hard to keep track of which mode you are in, just enter `e scr.prompt.mode=true` to update +the color of the prompt based on the vi-mode. diff --git a/src/basic_commands/flags.md b/src/basic_commands/flags.md index 68afd96d..3906e54b 100644 --- a/src/basic_commands/flags.md +++ b/src/basic_commands/flags.md @@ -1,6 +1,8 @@ ## Flags -Flags are conceptually similar to bookmarks. They associate a name with a given offset in a file. Flags can be grouped into 'flagspaces'. A flagspace is a namespace for flags, grouping together flags of similar characteristics or type. Examples for flagspaces: sections, registers or symbols. +Flags are conceptually similar to bookmarks. They associate a name with a given offset in a file. Flags can be grouped +into 'flagspaces'. A flagspace is a namespace for flags, grouping together flags of similar characteristics or type. +Examples for flagspaces: sections, registers or symbols. To create a flag: @@ -8,7 +10,8 @@ To create a flag: [0x100003ba0]> f flag_name @ offset ``` -You can remove a flag by appending the `-` character to command. Most commands accept `-` as argument-prefix as an indication to delete something. +You can remove a flag by appending the `-` character to command. Most commands accept `-` as argument-prefix as +an indication to delete something. ``` [0x100003ba0]> f-flag_name @@ -70,8 +73,9 @@ You can rename flags with `fr`. ### Local flags -Every flag name should be unique for addressing reasons. But it is quite a common need -to have the flags, for example inside the functions, with simple and ubiquitous names like `loop` or `return`. For this purpose you can use so called "local" flags, which are tied to the function where they reside. It is possible to add them using `f.` command: +Every flag name should be unique for addressing reasons. But it is quite a common need to have the flags, for example +inside the functions, with simple and ubiquitous names like `loop` or `return`. For this purpose you can use so-called +"local" flags, which are tied to the function where they reside. It is possible to add them using `f.` command: ``` [0x00003a04]> pd 10 @@ -110,12 +114,14 @@ to have the flags, for example inside the functions, with simple and ubiquitous ### Flag Zones -Rizin offers flag zones, which lets you label different offsets on the scrollbar, for making it easier to navigate through large binaries. You can set a flag zone on the current seek using: +Rizin offers flag zones, which lets you label different offsets on the scrollbar, for making it easier to navigate +through large binaries. You can set a flag zone on the current seek using: ``` [0x00003a04]> fz flag-zone-name ``` -Set `e scr.scrollbar=1` and go to the Visual mode, to see your flag zone appear on the scrollbar on the right end of the window. +Set `e scr.scrollbar=1` and go to the Visual mode, to see your flag zone appear on the scrollbar on the right end of +the window. See `fz?` for more information. \ No newline at end of file diff --git a/src/basic_commands/intro.md b/src/basic_commands/intro.md index 663288f9..08122c0b 100644 --- a/src/basic_commands/intro.md +++ b/src/basic_commands/intro.md @@ -1,8 +1,12 @@ # Basic Commands -Most command names in rizin are derived from action names. They should be easy to remember, as they are short. Actually, all commands are single letters. Subcommands or related commands are specified using the second character of the command name. For example, `/ foo` is a command to search plain string, while `/x 90 90` is used to look for hexadecimal pairs. +Most command names in rizin are derived from action names. They should be easy to remember, as they are short. +Actually, all commands are single letters. Subcommands or related commands are specified using the second character +of the command name. For example, `/ foo` is a command to search plain string, while `/x 90 90` is used to look for +hexadecimal pairs. -The general format for a valid command (as explained in the [Rizin Command-line](../first_steps/commandline_rizin.md) chapter) looks like this: +The general format for a valid command (as explained in the [Rizin Command-line](../first_steps/commandline_rizin.md) +chapter) looks like this: ``` [.][times][cmd][~grep][@[@iter]addr!size][|>pipe] ; ... @@ -14,20 +18,24 @@ For example, > 3s +1024 ; seeks three times 1024 from the current seek ``` -If a command starts with `R!`, the rest of the string is passed to the currently loaded IO plugin (a debugger, for example). Most plugins provide help messages with `R!?` or `R!help`. +If a command starts with `R!`, the rest of the string is passed to the currently loaded IO plugin (a debugger, +for example). Most plugins provide help messages with `R!?` or `R!help`. ``` $ rizin -d /bin/ls > R!help ; handled by the IO plugin ``` -If a command starts with `!`, posix_system() is called to pass the command to your shell. Check `!?` for more options and usage examples. +If a command starts with `!`, posix_system() is called to pass the command to your shell. Check `!?` for more options +and usage examples. ``` > !ls ; run `ls` in the shell ``` -The meaning of the arguments (iter, addr, size) depends on the specific command. As a rule of thumb, most commands take a number as an argument to specify the number of bytes to work with, instead of the currently defined block size. Some commands accept math expressions or strings. +The meaning of the arguments (iter, addr, size) depends on the specific command. As a rule of thumb, most commands take +a number as an argument to specify the number of bytes to work with, instead of the currently defined block size. +Some commands accept math expressions or strings. ``` > px 0x17 ; show 0x17 bytes in hexs at current seek @@ -35,14 +43,16 @@ The meaning of the arguments (iter, addr, size) depends on the specific command. > / lib ; search for 'lib' string. ``` -The `@` sign is used to specify a temporary offset location or a seek position at which the command is executed, instead of current seek position. This is quite useful as you don't have to seek around all the time. +The `@` sign is used to specify a temporary offset location or a seek position at which the command is executed, +instead of current seek position. This is quite useful as you don't have to seek around all the time. ``` > p8 10 @ 0x4010 ; show 10 bytes at offset 0x4010 > f patata @ 0x10 ; set 'patata' flag at offset 0x10 ``` -Using `@@` sub-commands you can execute a single command on a list of flags, functions, symbols, etc.. You can think of these as foreach operations: +Using `@@` sub-commands you can execute a single command on a list of flags, functions, symbols, etc. You can think +of these as foreach operations: ``` > s 0 @@ -57,7 +67,8 @@ The `>` operation is used to redirect the output of a command into a file (overw > f > flags.txt ; dump flag list to 'flags.txt' ``` -The `|` operation (pipe) is similar to what you are used to expect from it in a *NIX shell: an output of one command as input to another. +The `|` operation (pipe) is similar to what you are used to expect from it in a *NIX shell: an output of one command +as input to another. ``` [0x4A13B8C0]> f | grep section | grep text diff --git a/src/basic_commands/mapping_files.md b/src/basic_commands/mapping_files.md index 973ec2e8..435cd766 100644 --- a/src/basic_commands/mapping_files.md +++ b/src/basic_commands/mapping_files.md @@ -1,14 +1,20 @@ ## Mapping Files -Rizin's I/O subsystem allows you to map the contents of files into the same I/O space used to contain a loaded binary. New contents can be placed at random offsets. +Rizin's I/O subsystem allows you to map the contents of files into the same I/O space used to contain a loaded binary. +New contents can be placed at random offsets. -The `o` command permits the user to open a file, this is mapped at offset 0 unless it has a known binary header and then the maps are created in virtual addresses. +The `o` command permits the user to open a file, this is mapped at offset 0 unless it has a known binary header and +then the maps are created in virtual addresses. Sometimes, we want to rebase a binary, or maybe we want to load or map the file in a different address. -When launching rizin, the base address can be changed with the `-B` flag. But you must notice the difference when opening files with unknown headers, like bootloaders, so we need to map them using the `-m` flag (or specifying it as argument to the `o` command). +When launching rizin, the base address can be changed with the `-B` flag. But you must notice the difference when +opening files with unknown headers, like bootloaders, so we need to map them using the `-m` flag (or specifying it as +argument to the `o` command). -rizin is able to open files and map portions of them at random places in memory specifying attributes like permissions and name. It is the perfect basic tooling to reproduce an environment like a core file, a debug session, by also loading and mapping all the libraries the binary depends on. +Rizin is able to open files and map portions of them at random places in memory specifying attributes like permissions +and name. It is the perfect basic tooling to reproduce an environment like a core file, a debug session, by also +loading and mapping all the libraries the binary depends on. Opening files (and mapping them) is done using the `o` (open) command. Let's read the help: @@ -38,32 +44,37 @@ Usage: o[?] # Open files and handle opened files Prepare a simple layout: ```bash -$ rz-bin -l /bin/ls -[Linked libraries] -libselinux.so.1 -librt.so.1 -libacl.so.1 +$ rz-bin -l hello_world +[Libs] +library +---------- libc.so.6 - -4 libraries ``` Map a file: ``` -[0x00001190]> o /bin/zsh 0x499999 +[0x00001100]> o /bin/sh 0x499999 ``` List mapped files: ``` [0x00000000]> ol -- 6 /bin/ls @ 0x0 ; r -- 10 /lib/ld-linux.so.2 @ 0x100000000 ; r -- 14 /bin/zsh @ 0x499999 ; r + 3 - r-x 0x00003d48 /home/user/playground/book/examples/hello_world/hello_world + 4 - r-x 0x00000070 vfile://0/reloc-targets + 5 - rw- 0x00000008 null://8 + 6 - r-x 0x00003d48 vfile://0/patched + 7 * r-x 0x000d5b68 /bin/sh + 8 - r-- 0x00000bf0 vfile://1/reloc-targets + 9 - rw- 0x0000ed7c null://60796 +10 - r-- 0x000d5b68 vfile://1/patched ``` -Print hexadecimal values from /bin/zsh: +_Note: `vfile` is a virtual file, that is often automatically created to patch relocations and could also be +created manually, if needed. It was created to avoid modifying the original file/IO ranges._ + +Print hexadecimal values from /bin/sh: ``` [0x00000000]> px @ 0x499999 @@ -72,7 +83,7 @@ Print hexadecimal values from /bin/zsh: Unmap files using the `o-` command. Pass the required file descriptor to it as an argument: ``` -[0x00000000]> o- 14 +[0x00000000]> o- 7 ``` You can also view the ascii table showing the list of the opened files: diff --git a/src/basic_commands/print_modes.md b/src/basic_commands/print_modes.md index f574adc0..d6cfa406 100644 --- a/src/basic_commands/print_modes.md +++ b/src/basic_commands/print_modes.md @@ -1,55 +1,55 @@ ## Print Modes -One of the key features of rizin is displaying information in many formats. The goal is to offer a selection of display choices to interpret binary data in the best possible way. +One of the key features of Rizin is displaying information in many formats. The goal is to offer a selection of display +choices to interpret binary data in the best possible way. -Binary data can be represented as integers, shorts, longs, floats, timestamps, hexpair strings, or more complex formats like C structures, disassembly listings, decompilation listing, be a result of an external processing... +Binary data can be represented as integers, shorts, longs, floats, timestamps, hexpair strings, or more complex formats +like C structures, disassembly listings, decompilation listing, be a result of an external processing... Below is a list of available print modes listed by `p?`: ``` -[0x00005310]> p? -|Usage: p[=68abcdDfiImrstuxz] [arg|len] [@addr] +[0x00001100]> p? +Usage: p[=68abcdDfiImrstuxz] [arg|len] [@addr] | p[b|B|xb] [len] ([S]) bindump N bits skipping S bytes -| p[iI][df] [len] print N ops/bytes (f=func) (see pi? and pdi) +| p[iI][df] [len] print N ops/bytes (f=func) (see pi? and pdq) | p[kK] [len] print key in randomart (K is for mosaic) -| p-[?][jh] [mode] bar|json|histogram blocks (mode: el search.in) +| p-[?][jh] [mode] bar|json|histogram blocks (mode: e?search.in) | p2 [len] 8x8 2bpp-tiles -| p3 [file] print stereogram (3D) | p6[de] [len] base64 decode/encode | p8[?][j] [len] 8bit hexpair list of bytes | p=[?][bep] [N] [L] [b] show entropy/printable chars/chars bars | pa[edD] [arg] pa:assemble pa[dD]:disasm or pae: esil from hex -| pA[n_ops] show n_ops address and type | pb[?] [n] bitstream of N bits | pB[?] [n] bitstream of N bytes | pc[?][p] [len] output C (or python) format -| pC[aAcdDxw] [rows] print disassembly in columns (see hex.cols and pdi) +| pC[aAcdDxw] [rows] print disassembly in columns (see hex.cols and pdq) | pd[?] [sz] [a] [b] disassemble N opcodes (pd) or N bytes (pD) | pf[?][.nam] [fmt] print formatted data (pf.name, pf.name $) | pF[?][apx] print asn1, pkcs7 or x509 | pg[?][x y w h] [cmd] create new visual gadget or print it (see pg? for details) | ph[?][=|hash] ([len]) calculate hash for a block +| pi[?][bdefrj] [num] print instructions +| pI[?][iI][df] [len] print N instructions/bytes (f=func) | pj[?] [len] print as indented JSON | pm[?] [magic] print libmagic data (see pm? and /m?) | po[?] hex print operation applied to block (see po?) | pp[?][sz] [len] print patterns, see pp? for more help -| pq[?][is] [len] print QR code with the first Nbytes | pr[?][glx] [len] print N raw bytes (in lines or hexblocks, 'g'unzip) | ps[?][pwz] [len] print pascal/wide/zero-terminated strings | pt[?][dn] [len] print different timestamps | pu[?][w] [len] print N url encoded bytes (w=wide) | pv[?][jh] [mode] show variable/pointer/value in memory -| pwd display current working directory | px[?][owq] [len] hexdump of N bytes (o=octal, w=32bit, q=64bit) -[0x00005310]> +| plf print the RzIL output of the function ``` Tip: when using json output, you can append the `~{}` to the command to get a pretty-printed version of the output: ``` -[0x00000000]> oj +[0x00000000]> olj [{"raised":false,"fd":563280,"uri":"malloc://512","from":0,"writable":true,"size":512,"overlaps":false}] -[0x00000000]> oj~{} +[0x00000000]> olj~{} [ { "raised": false, @@ -63,28 +63,58 @@ Tip: when using json output, you can append the `~{}` to the command to get a pr ] ``` -For more on the magical powers of `~` see the help in `?@?`, and the [Rizin Command-line](../first_steps/commandline_rizin.md) chapter earlier in the book. +For more on the magical powers of `~` see the help in `?@?`, and the [Rizin Command-line](../first_steps/commandline_rizin.md) +chapter earlier in the book. ### Hexadecimal View `px` gives a user-friendly output showing 16 pairs of numbers per row with offsets and raw representations: -![hexprint](print_modes_px.png) +``` +[0x00001100]> px +- offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF +0x00001100 f30f 1efa 31ed 4989 d15e 4889 e248 83e4 ....1.I..^H..H.. +0x00001110 f050 5445 31c0 31c9 488d 3dca 0000 00ff .PTE1.1.H.=..... +0x00001120 15b3 2e00 00f4 662e 0f1f 8400 0000 0000 ......f......... +``` #### Show Hexadecimal Words Dump (32 bits) -![wordprint](print_modes_pxw.png) +``` +[0x00001100]> pxw +0x00001100 0xfa1e0ff3 0x8949ed31 0x89485ed1 0xe48348e2 ....1.I..^H..H.. +0x00001110 0x455450f0 0xc931c031 0xca3d8d48 0xff000000 .PTE1.1.H.=..... +0x00001120 0x002eb315 0x2e66f400 0x00841f0f 0x00000000 ......f......... + +[0x00001100]> e cfg.bigendian +false + +[0x00001100]> e cfg.bigendian=true + +[0x00001100]> pxw +0x00001100 0xf30f1efa 0x31ed4989 0xd15e4889 0xe24883e4 ....1.I..^H..H.. +0x00001110 0xf0505445 0x31c031c9 0x488d3dca 0x000000ff .PTE1.1.H.=..... +0x00001120 0x15b32e00 0x00f4662e 0x0f1f8400 0x00000000 ......f......... + +[0x00001100]> e cfg.bigendian=false +``` #### 8 bits Hexpair List of Bytes ``` -[0x00404888]> p8 16 -31ed4989d15e4889e24883e4f0505449 +[0x00001100]> p8 16 +f30f1efa31ed4989d15e4889e24883e4 ``` #### Show Hexadecimal Quad-words Dump (64 bits) -![pxq](print_modes_pxq.png) +``` +[0x00001100]> pxq +0x00001100 0x8949ed31fa1e0ff3 0xe48348e289485ed1 ....1.I..^H..H.. +0x00001110 0xc931c031455450f0 0xff000000ca3d8d48 .PTE1.1.H.=..... +0x00001120 0x2e66f400002eb315 0x0000000000841f0f ......f......... +0x00001130 0x4800002ed93d8d48 0x394800002ed2058d H.=....H......H9 +``` ### Date/Time Formats @@ -92,12 +122,12 @@ Currently supported timestamp output modes are: ``` [0x00404888]> pt? -|Usage: pt [dn] print timestamps -| pt. print current time -| pt print UNIX time (32 bit `cfg.bigendian`) Since January 1, 1970 -| ptd print DOS time (32 bit `cfg.bigendian`) Since January 1, 1980 -| pth print HFS time (32 bit `cfg.bigendian`) Since January 1, 1904 -| ptn print NTFS time (64 bit `cfg.bigendian`) Since January 1, 1601 +Usage: pt[.dhn] # Print timestamps +| pt # Print UNIX epoch time (32 bit `cfg.bigendian`, since January 1, 1970) +| pt. # Print the current time +| ptd # Print MS-DOS time (32 bit `cfg.bigendian`, since January 1, 1980) +| pth # Print Mac HFS time (32 bit `cfg.bigendian`, since January 1, 1904) +| ptn # Print NTFS time (64 bit `cfg.bigendian`, since January 1, 1601) ``` For example, you can 'view' the current buffer as timestamps in the ntfs time: @@ -111,7 +141,8 @@ For example, you can 'view' the current buffer as timestamps in the ntfs time: 20:05:13001 09:29:21 +0000 ``` -As you can see, the endianness affects the result. Once you have printed a timestamp, you can grep the output, for example, by year: +As you can see, the endianness affects the result. Once you have printed a timestamp, you can grep the output, for +example, by year: ``` [0x08048000]> pt ~1974 | wc -l @@ -120,7 +151,8 @@ As you can see, the endianness affects the result. Once you have printed a times 27:04:2022 16:15:43 +0000 ``` -The default date format can be configured using the `cfg.datefmt` variable. Formatting rules for it follow the well known strftime(3) format. Check the manpage for more details, but these are the most important: +The default date format can be configured using the `cfg.datefmt` variable. Formatting rules for it follow the well +known strftime(3) format. Check the manpage for more details, but these are the most important: ``` %a The abbreviated name of the day of the week according to the current locale. @@ -143,154 +175,171 @@ The default date format can be configured using the `cfg.datefmt` variable. Form ### Basic Types -There are print modes available for all basic types. If you are interested in a more complex structure, type `pf??` for format characters and `pf???` for examples: +There are print modes available for all basic types. If you are interested in a more complex structure, type `pf??` +for format characters and `pf???` for examples: ``` [0x00499999]> pf?? -|pf: pf[.k[.f[=v]]|[v]]|[n]|[0|cnt][fmt] [a0 a1 ...] -| Format: -| b byte (unsigned) -| B resolve enum bitfield (see t?) -| c char (signed byte) -| C byte in decimal -| d 0xHEX value (4 bytes) (see 'i' and 'x') -| D disassemble one opcode -| e temporally swap endian -| E resolve enum name (see t?) -| f float value (4 bytes) -| F double value (8 bytes) -| i signed integer value (4 bytes) (see 'd' and 'x') -| n next char specifies size of signed value (1, 2, 4 or 8 byte(s)) -| N next char specifies size of unsigned value (1, 2, 4 or 8 byte(s)) -| o octal value (4 byte) -| p pointer reference (2, 4 or 8 bytes) -| q quadword (8 bytes) -| r CPU register `pf r (eax)plop` -| s 32bit pointer to string (4 bytes) -| S 64bit pointer to string (8 bytes) -| t UNIX timestamp (4 bytes) -| T show Ten first bytes of buffer -| u uleb128 (variable length) -| w word (2 bytes unsigned short in hex) -| x 0xHEX value and flag (fd @ addr) (see 'd' and 'i') -| X show formatted hexpairs -| z null terminated string -| Z null terminated wide string -| ? data structure `pf ? (struct_name)example_name` -| * next char is pointer (honors asm.bits) -| + toggle show flags for each offset -| : skip 4 bytes -| . skip 1 byte -| ; rewind 4 bytes -| , rewind 1 byte +Usage: pf[j*q] # Show data using given format string +| pf # Show data using given format string +| pfj # Show data using given format string (JSON mode) +| pf* # Show data using given format string (rizin mode) +| pfq # Show data using given format string (quiet mode) + +Formats: +| b # byte (unsigned) +| B # resolve enum bitfield (see t?) +| c # char (signed byte) +| C # byte in decimal +| d # 0xHEX value (4 bytes) (see 'i' and 'x' formats) +| D # disassemble one opcode +| e # temporarily swap endian +| E # resolve enum name (see t?) +| f # float value (4 bytes) +| F # double float value (8 bytes) +| i # signed integer value (4 bytes) (see 'd' and 'x' formats) +| n # next char specifies size of signed value (1, 2, 4, or 8 byte(s)) +| N # next char specifies size of unsigned value (1, 2, 4, or 8 byte(s)) +| o # octal value (4 bytes) +| p # pointer reference (2, 4, or 8 bytes) +| q # quadword (8 bytes) +| Q # octoword (uint128_t) (16 bytes) +| r # CPU register (`pf r (eax)plop`) +| s # 32 bit pointer to string (4 bytes) +| s # 32 bit pointer to string (4 bytes) +| t # 32 bit UNIX timestamp (4 bytes) +| T # show ten first bytes of buffer +| u # uleb128 (variable length) +| w # word (2 bytes unsigned short in hex) +| x # 0xHEX value and flag (fd @ addr) (see 'd' and 'i' formats) +| X # show formatted hexpairs +| z # null terminated string +| Z # null terminated wide string +| ? # data structure `pf ? (struct_name)example_name` +| * # next char is pointer (honors 'asm.bits') +| + # toggle show flags for each offset +| : # skip 4 bytes +| . # skip 1 byte +| ; # rewind 4 bytes +| , # rewind 1 byte ``` Use triple-question-mark `pf???` to get some examples using print format strings. ``` -[0x00499999]> pf??? -|pf: pf[.k[.f[=v]]|[v]]|[n]|[0|cnt][fmt] [a0 a1 ...] -| Examples: -| pf 3xi foo bar 3-array of struct, each with named fields: 'foo' as hex, and 'bar' as int -| pf B (BitFldType)arg_name` bitfield type -| pf E (EnumType)arg_name` enum type -| pf.obj xxdz prev next size name Define the obj format as xxdz -| pf obj=xxdz prev next size name Same as above -| pf *z*i*w nb name blob Print the pointers with given labels -| pf iwq foo bar troll Print the iwq format with foo, bar, troll as the respective names for the fields -| pf 0iwq foo bar troll Same as above, but considered as a union (all fields at offset 0) -| pf.plop ? (troll)mystruct Use structure troll previously defined -| pfj.plop @ 0x14 Apply format object at the given offset -| pf 10xiz pointer length string Print a size 10 array of the xiz struct with its field names -| pf 5sqw string quad word Print an array with sqw struct along with its field names -| pf {integer}? (bifc) Print integer times the following format (bifc) -| pf [4]w[7]i Print an array of 4 words and then an array of 7 integers -| pf ic...?i foo bar "(pf xw yo foo)troll" yo Print nested anonymous structures -| pf ;..x Print value located 6 bytes from current offset -| pf [10]z[3]i[10]Zb Print an fixed size str, widechar, and var -| pfj +F @ 0x14 Print the content at given offset with flag -| pf n2 print signed short (2 bytes) value. Use N instead of n for printing unsigned values -| pf [2]? (plop)structname @ 0 Prints an array of structs -| pf eqew bigWord beef Swap endianness and print with given labels -| pf.foo rr (eax)reg1 (eip)reg2 Create object referencing to register values -| pf tt troll plop print time stamps with labels troll and plop +[0x00001100]> pf??? +Usage: p[=68abcdDfiImrstuxz] [arg|len] [@addr] +| p[b|B|xb] [len] ([S]) bindump N bits skipping S bytes +| p[iI][df] [len] print N ops/bytes (f=func) (see pi? and pdq) +| p[kK] [len] print key in randomart (K is for mosaic) +| p-[?][jh] [mode] bar|json|histogram blocks (mode: e?search.in) +| p2 [len] 8x8 2bpp-tiles +| p6[de] [len] base64 decode/encode +| p8[?][j] [len] 8bit hexpair list of bytes +| p=[?][bep] [N] [L] [b] show entropy/printable chars/chars bars +| pa[edD] [arg] pa:assemble pa[dD]:disasm or pae: esil from hex +| pb[?] [n] bitstream of N bits +| pB[?] [n] bitstream of N bytes +| pc[?][p] [len] output C (or python) format +| pC[aAcdDxw] [rows] print disassembly in columns (see hex.cols and pdq) +| pd[?] [sz] [a] [b] disassemble N opcodes (pd) or N bytes (pD) +| pf[?][.nam] [fmt] print formatted data (pf.name, pf.name $) +| pF[?][apx] print asn1, pkcs7 or x509 +| pg[?][x y w h] [cmd] create new visual gadget or print it (see pg? for details) +| ph[?][=|hash] ([len]) calculate hash for a block +| pi[?][bdefrj] [num] print instructions +| pI[?][iI][df] [len] print N instructions/bytes (f=func) +| pj[?] [len] print as indented JSON +| pm[?] [magic] print libmagic data (see pm? and /m?) +| po[?] hex print operation applied to block (see po?) +| pp[?][sz] [len] print patterns, see pp? for more help +| pr[?][glx] [len] print N raw bytes (in lines or hexblocks, 'g'unzip) +| ps[?][pwz] [len] print pascal/wide/zero-terminated strings +| pt[?][dn] [len] print different timestamps +| pu[?][w] [len] print N url encoded bytes (w=wide) +| pv[?][jh] [mode] show variable/pointer/value in memory +| px[?][owq] [len] hexdump of N bytes (o=octal, w=32bit, q=64bit) +| plf print the RzIL output of the function ``` + Some examples are below: ``` -[0x4A13B8C0]> pf i -0x00404888 = 837634441 -``` -``` -[0x4A13B8C0]> pf -0x00404888 = 837634432.000000 +[0x00001100]> pf i +0x00001100 = -98693133 + +[0x00001100]> pf f +0x00001100 = -2.05176598e+35 + ``` ### High-level Languages Views Valid print code formats for human-readable languages are: -* `pc` C -* `pc*` print 'wx' rizin commands -* `pch` C half-words (2 byte) -* `pcw` C words (4 byte) -* `pcd` C dwords (8 byte) -* `pci` C array of bytes with instructions -* `pca` GAS .byte blob -* `pcA` .bytes with instructions in comments -* `pcs` string -* `pcS` shellscript that reconstructs the bin -* `pcj` json -* `pcJ` javascript -* `pco` Objective-C -* `pcp` python -* `pck` kotlin -* `pcr` rust -* `pcv` JaVa -* `pcV` V (vlang.io) -* `pcy` yara -* `pcz` Swift - -If we need to create a .c file containing a binary blob, use the `pc` command, that creates this output. The default size is like in many other commands: the block size, which can be changed with the `b` command. - -We can also just temporarily override this block size by expressing it as an argument. - -``` -[0xB7F8E810]> pc 32 -#define _BUFFER_SIZE 32 -unsigned char buffer[_BUFFER_SIZE] = { -0x89, 0xe0, 0xe8, 0x49, 0x02, 0x00, 0x00, 0x89, 0xc7, 0xe8, 0xe2, 0xff, 0xff, 0xff, 0x81, 0xc3, 0xd6, 0xa7, 0x01, 0x00, 0x8b, 0x83, 0x00, 0xff, 0xff, 0xff, 0x5a, 0x8d, 0x24, 0x84, 0x29, 0xc2 }; ``` +0x00001100]> pc? +Usage: pc[?] # Print bytes as code byte arrays. +| pc [] # Generate a C/C++ byte array. +| pch # Generate a C/C++ 16 bits array. +| pcw # Generate a C/C++ 32 bits array. +| pcd # Generate a C/C++ 64 bits array. +| pca [] # Generate a byte array in GAS assembly. +| pcA [] # Generate a byte array in GAS assembly with instructions in comments. +| pcb [] # Generate a bash script with the byte array. +| pcg [] # Generate a Golang byte array. +| pcJ [] # Generate a Java byte array. +| pcj [] # Generate a JSON byte array. +| pck [] # Generate a Kotlin byte array. +| pcn [] # Generate a NodeJS buffer. +| pco [] # Generate a Objective-C/C++ byte array. +| pcp [] # Generate a Python byte array. +| pcr [] # Generate a Rust byte array. +| pcs [] # Generate a Swift byte array. +| pcy [] # Generate a Yara match pattern. +| pc* [] # Generate a rizin commands for writing the byte array. +``` + +If we need to create a .c file containing a binary blob, use the `pc` command, that creates this output. The default +size is like in many other commands: the block size, which can be changed with the `b` command. -That cstring can be used in many programming languages, not just C. +We can also just temporarily override this block size by expressing it as an argument. ``` -[0x7fcd6a891630]> pcs -"\x48\x89\xe7\xe8\x68\x39\x00\x00\x49\x89\xc4\x8b\x05\xef\x16\x22\x00\x5a\x48\x8d\x24\xc4\x29\xc2\x52\x48\x89\xd6\x49\x89\xe5\x48\x83\xe4\xf0\x48\x8b\x3d\x06\x1a +[0x00001100]> pc 32 +#define ARRAY_SIZE 32 +const uint8_t array[ARRAY_SIZE] = { + 0xf3, 0x0f, 0x1e, 0xfa, 0x31, 0xed, 0x49, 0x89, 0xd1, 0x5e, 0x48, 0x89, 0xe2, 0x48, 0x83, 0xe4, + 0xf0, 0x50, 0x54, 0x45, 0x31, 0xc0, 0x31, 0xc9, 0x48, 0x8d, 0x3d, 0xca, 0x00, 0x00, 0x00, 0xff +}; ``` ### Strings -Strings are probably one of the most important entry points when starting to reverse engineer a program because they usually reference information about functions' actions (asserts, debug or info messages...). Therefore, rizin supports various string formats: +Strings are probably one of the most important entry points when starting to reverse engineer a program because they +usually reference information about functions' actions (asserts, debug or info messages...). Therefore, Rizin supports +various string formats: ``` -[0x00000000]> ps? -|Usage: ps[bijqpsuwWxz+] [N] Print String -| ps print string -| ps+[j] print libc++ std::string (same-endian, ascii, zero-terminated) -| psb print strings in current block -| psi print string inside curseek -| psj print string in JSON format -| psp[j] print pascal string -| psq alias for pqs -| pss print string in screen (wrap width) -| psu[zj] print utf16 unicode (json) -| psw[j] print 16bit wide string -| psW[j] print 32bit wide string -| psx show string with escaped chars -| psz[j] print zero-terminated string +[0x00001100]> ps? +Usage: ps[?] # Print string at the current offset +| ps[j] =null # Print the autodetected string at the current offset (null->zero-terminated, block->block-terminated) +| ps+[j] # Print libc++ std::string (same-endian, ascii, zero-terminated) +| psb[q] # Print all the strings in current block +| psc[?] # Generate a C/C++ string +| psi[?] # Print the first string in the current block +| psp[j] =8 # Print the pascal string at the current offset +| pss[?] # Print string at the current offset in screen (wrap width) +| psm[j] # Print buffer as a utf16be string +| psM[j] # Print buffer as a utf32be string +| psn[j] # Print string with escaped new lines +| psw[j] # Print buffer as a utf16le string +| psW[j] # Print buffer as a utf32le string ``` -Most strings are zero-terminated. Below there is an example using the debugger to continue the execution of a program until it executes the 'open' syscall. When we recover the control over the process, we get the arguments passed to the syscall, pointed by %ebx. In the case of the 'open' call, it is a zero terminated string which we can inspect using `psz`. +Most strings are zero-terminated. Below there is an example using the debugger to continue the execution of a program +until it executes the 'open' syscall. When we recover the control over the process, we get the arguments passed to the +syscall, pointed by %ebx. In the case of the 'open' call, it is a zero terminated string which we can inspect using +`psz`. ``` [0x4A13B8C0]> dcs open @@ -316,7 +365,10 @@ It is also possible to print various packed data types using the `pf` command: 0x7fff0d29da38 = 0x7fff0d29da38 -> 0x0d29f7ee /bin/ls ``` -This can be used to look at the arguments passed to a function. To achieve this, simply pass a 'format memory string' as an argument to `pf`, and temporally change the current seek position/offset using `@`. It is also possible to define arrays of structures with `pf`. To do this, prefix the format string with a numeric value. You can also define a name for each field of the structure by appending them as a space-separated arguments list. +This can be used to look at the arguments passed to a function. To achieve this, simply pass a 'format memory string' +as an argument to `pf`, and temporally change the current seek position/offset using `@`. It is also possible to define +arrays of structures with `pf`. To do this, prefix the format string with a numeric value. You can also define a name +for each field of the structure by appending them as a space-separated arguments list. ``` [0x4A13B8C0]> pf 2*xw pointer type @ esp @@ -361,15 +413,19 @@ $ rizin /usr/lib/gstreamer-1.0/libgsttcp.so ### Disassembly -The `pd` command is used to disassemble code. It accepts a numeric value to specify how many instructions should be disassembled. The `pD` command is similar but instead of a number of instructions, it decompiles a given number of bytes. +The `pd` command is used to disassemble code. It accepts a numeric value to specify how many instructions should be +disassembled. The `pD` command is similar but instead of a number of instructions, it decompiles a given number of +bytes. -* `d` : disassembly N opcodes count of opcodes -* `D` : asm.arch disassembler bsize bytes +* `pd` : Disassemble N instructions (can be negative) +* `pD` : Disassemble N bytes (can be negative) ``` -[0x00404888]> pd 1 - ;-- entry0: - 0x00404888 31ed xor ebp, ebp +[0x00001100]> pd 1 + ;-- entry0: + ;-- section..text: + ;-- _start: + 0x00001100 endbr64 ; [14] -r-x section size 409 named .text ``` ### Selecting Target Architecture @@ -377,59 +433,63 @@ The `pd` command is used to disassemble code. It accepts a numeric value to spec The architecture flavor for the disassembler is defined by the `asm.arch` eval variable. You can use `e asm.arch=??` to list all available architectures. ``` -[0x00005310]> e asm.arch=?? -_dAe _8_16 6502 LGPL3 6502/NES/C64/Tamagotchi/T-1000 CPU -_dAe _8 8051 PD 8051 Intel CPU -_dA_ _16_32 arc GPL3 Argonaut RISC Core -a___ _16_32_64 arm.as LGPL3 as ARM Assembler (use ARM_AS environment) -adAe _16_32_64 arm BSD Capstone ARM disassembler -_dA_ _16_32_64 arm.gnu GPL3 Acorn RISC Machine CPU -_d__ _16_32 arm.winedbg LGPL2 WineDBG's ARM disassembler -adAe _8_16 avr GPL AVR Atmel -adAe _16_32_64 bf LGPL3 Brainfuck -_dA_ _32 chip8 LGPL3 Chip8 disassembler -_dA_ _16 cr16 LGPL3 cr16 disassembly plugin -_dA_ _32 cris GPL3 Axis Communications 32-bit embedded processor -adA_ _32_64 dalvik LGPL3 AndroidVM Dalvik -ad__ _16 dcpu16 PD Mojang's DCPU-16 -_dA_ _32_64 ebc LGPL3 EFI Bytecode -adAe _16 gb LGPL3 GameBoy(TM) (z80-like) -_dAe _16 h8300 LGPL3 H8/300 disassembly plugin -_dAe _32 hexagon LGPL3 Qualcomm Hexagon (QDSP6) V6 -_d__ _32 hppa GPL3 HP PA-RISC -_dAe _0 i4004 LGPL3 Intel 4004 microprocessor -_dA_ _8 i8080 BSD Intel 8080 CPU -adA_ _32 java Apache Java bytecode -_d__ _32 lanai GPL3 LANAI +[0x00001100]> e asm.arch=?? +_dAeI 8 16 6502 LGPL3 6502/NES/C64/Tamagotchi/T-1000 CPU +adAeI 8 8051 PD 8051 Intel CPU +_dA__ 32 amd29k LGPL3 AMD 29k RISC CPU (by deroad) +_dA__ 16 32 arc GPL3 Argonaut RISC Core +adAeI 16 32 64 arm BSD Capstone ARM disassembler +a____ 16 32 64 arm.as LGPL3 as ARM Assembler (use RZ_ARM32_AS and RZ_ARM64_AS environment) (by pancake) +adAeI 8 16 avr LGPL3 AVR Atmel +adA_I 16 32 64 bf LGPL3 Brainfuck (by pancake, nibble) v4.0.0 +_dA__ 32 chip8 LGPL3 Chip8 disassembler +_dA__ 16 32 64 cil LGPL3 .NET Common Intermediate Language +_dA__ 16 cr16 LGPL3 cr16 disassembly plugin +_dA__ 32 cris GPL3 Axis Communications 32-bit embedded processor (by pancake) +adA__ 32 64 dalvik LGPL3 AndroidVM Dalvik +ad___ 16 dcpu16 PD Mojang's DCPU-16 +_dA__ 32 64 ebc LGPL3 EFI Bytecode (by Fedor Sakharov) +adAeI 16 gb LGPL3 GameBoy(TM) (z80-like) (by condret) +_dAe_ 16 h8300 LGPL3 H8/300 disassembly plugin +_dA_I 32 hexagon LGPL3 Qualcomm Hexagon (QDSP6) V6 (by Rot127) +_d___ 32 hppa GPL3 HP PA-RISC +_dA__ 4 i4004 LGPL3 Intel 4004 microprocessor +_dA__ 8 i8080 BSD Intel 8080 CPU +adA__ 32 java LGPL-3 Java bytecode disassembler (by deroad) +_d___ 32 lanai GPL3 LANAI ... ``` ### Configuring the Disassembler -There are multiple options which can be used to configure the output of the disassembler. All these options are described in `el asm.` - -``` -[0x00005310]> el asm. -asm.analysis: Analyze code and refs while disassembling (see analysis.strings) -asm.arch: Set the arch to be used by asm -asm.assembler: Set the plugin name to use when assembling -asm.bbline: Show empty line after every basic block -asm.bits: Word size in bits at assembler -asm.bytes: Display the bytes of each instruction -asm.bytespace: Separate hexadecimal bytes with a whitespace -asm.calls: Show callee function related info as comments in disasm -asm.capitalize: Use camelcase at disassembly -asm.cmt.col: Column to align comments -asm.cmt.flgrefs: Show comment flags associated to branch reference -asm.cmt.fold: Fold comments, toggle with Vz +There are multiple options which can be used to configure the output of the disassembler. All these options are +described in `el asm.` + +``` +[0x00001100]> el asm. + asm.analysis: Analyze code and refs while disassembling (see analysis.strings) + asm.arch: Set the arch to be used by asm + asm.assembler: Set the plugin name to use when assembling + asm.bb.line: Show empty line after every basic block + asm.bb.middle: Realign disassembly if a basic block starts in the middle of an instruction + asm.bits: Word size in bits at assembler + asm.bytes: Display the bytes of each instruction + asm.bytes.right: Display the bytes at the right of the disassembly + asm.bytes.space: Separate hexadecimal bytes with a whitespace + asm.calls: Show callee function related info as comments in disasm + asm.capitalize: Use camelcase at disassembly + asm.cmt.col: Column to align comments + asm.cmt.esil: Show ESIL expressions as comments + asm.cmt.flgrefs: Show comment flags associated to branch reference ... ``` -Currently there are 136 `asm.` configuration variables so we do not list them all. +Currently, there are 130 `asm.` configuration variables so we do not list them all. ### Disassembly Syntax -The `asm.syntax` variable is used to change the flavor of the assembly syntax used by a disassembler engine. To switch between Intel and AT&T representations: +The `asm.syntax` variable is used to change the flavor of the assembly syntax used by a disassembler engine. To switch +between Intel and AT&T representations: ``` e asm.syntax=intel @@ -437,18 +497,24 @@ e asm.syntax=att ``` You can also check `asm.pseudo`, which is an experimental pseudocode view, -and `asm.esil` which outputs [ESIL](../disassembling/esil.md) ('Evaluable Strings Intermediate Language'). ESIL's goal is to have a human-readable representation of every opcode semantics. Such representations can be evaluated (interpreted) to emulate effects of individual instructions. +and `asm.esil` which outputs [ESIL](../disassembling/esil.md) ('Evaluable Strings Intermediate Language'). +ESIL's goal is to have a human-readable representation of every opcode semantics. Such representations can be evaluated +(interpreted) to emulate effects of individual instructions. ### Print gadgets -In Rizin, visual gadgets allows the users to cast or display the output of a particular Rizin command anywhere on the screen while in Visual mode. -This command is unrelated with displaying [ROP Gadgets](https://book.rizin.re/refcard/intro.html#searching). +In Rizin, visual gadgets allows the users to cast or display the output of a particular Rizin command anywhere on the +screen while in Visual mode. This command is unrelated with displaying [ROP Gadgets](https://book.rizin.re/refcard/intro.html#searching). + +Using the commands under `pg` (print gadgets), we can add, remove and change the location of these visual gadgets. +You can add a gadget using `pg`: -Using the commands under `pg` (print gadgets), we can add, remove and change the location of these visual gadgets. You can add a gadget using `pg`: ``` pg 10 10 10 10 ddr ``` -This will add the output of the Rizin command `ddr` be printed on the screen. The four arguments to be passed are the position (like coordinates) and -the height and width of the gadget you would like to see. This command requires the configuration variable `scr.gadgets` to be turned on. + +This will add the output of the Rizin command `ddr` be printed on the screen. The four arguments to be passed are +the position (like coordinates) and the height and width of the gadget you would like to see. This command requires +the configuration variable `scr.gadgets` to be turned on. See `pg?` for more information. diff --git a/src/basic_commands/print_modes_px.png b/src/basic_commands/print_modes_px.png deleted file mode 100644 index 0010654f..00000000 Binary files a/src/basic_commands/print_modes_px.png and /dev/null differ diff --git a/src/basic_commands/print_modes_pxq.png b/src/basic_commands/print_modes_pxq.png deleted file mode 100644 index 688fb491..00000000 Binary files a/src/basic_commands/print_modes_pxq.png and /dev/null differ diff --git a/src/basic_commands/print_modes_pxw.png b/src/basic_commands/print_modes_pxw.png deleted file mode 100644 index 57ae4ada..00000000 Binary files a/src/basic_commands/print_modes_pxw.png and /dev/null differ diff --git a/src/basic_commands/sdb.md b/src/basic_commands/sdb.md index a7665d65..ae1cef42 100644 --- a/src/basic_commands/sdb.md +++ b/src/basic_commands/sdb.md @@ -1,6 +1,8 @@ # SDB -SDB stands for String DataBase. It's a simple key-value database that only operates with strings created by pancake. It is used in many parts of rizin to have a disk and in-memory database which is small and fast to manage using it as a hashtable on steroids. +SDB stands for String DataBase. It's a simple key-value database that only operates with strings created by pancake. +It is used in many parts of rizin to have a disk and in-memory database which is small and fast to manage using it +as a hashtable on steroids. SDB is a simple string key/value database based on djb’s cdb disk storage and supports JSON and arrays introspection. @@ -16,6 +18,7 @@ SDB supports: ## Usage example + Let's create a database! ``` @@ -25,6 +28,7 @@ world ``` Using arrays: + ``` $ sdb - '[]list=1,2' '[0]list' '[0]list=foo' '[]list' '[+1]list=bar' 1 @@ -36,6 +40,7 @@ bar ``` Let's play with json: + ``` $ sdb d g='{"foo":1,"bar":{"cow":3}}' $ sdb d g?bar.cow @@ -45,6 +50,7 @@ $ sdb - user='{"id":123}' user?id=99 user?id ``` Using the command line without any disk database: + ``` $ sdb - foo=bar foo a=3 +a -a bar @@ -61,87 +67,82 @@ a=3 -a 3 ``` + Remove the database + ``` $ rm -f d - ``` ## So what ? + So, you can now do this inside your rizin sessions! -Let's take a simple binary, and check what is already _sdbized_. -``` -$ cat test.c -int main(){ - puts("Hello world\n"); -} -$ gcc test.c -o test -``` +Let's take a simple binary [hello_world](https://github.com/rizinorg/book/tree/master/examples/hello_world), +and check what is already _sdbized_. ``` -$ rizin -A ./test -[0x08048320]> # list namespaces under analysis -[0x08048320]> k ** -bin +$ rizin -A hello_world +[0x00001100]> k ** # list namespaces under analysis analysis -syscall +bin debug -``` +syscall +[0x00001100]> k bin/** +cur +fd.3 ``` -[0x08048320]> k bin/** -fd.6 -[0x08048320]> k bin/fd.6/* -archs=0:0:x86:32 -``` -The file corresponding to the sixth file descriptor is a x86\_32 binary. - -``` -[0x08048320]> k analysis/meta/* -meta.s.0x80484d0=12,SGVsbG8gd29ybGQ= -[...] -[0x08048320]> %b64- SGVsbG8gd29ybGQ= -Hello world -``` -Strings are stored encoded in base64. --- ## More Examples - List namespaces + ``` k ** ``` + List sub-namespaces + ``` k analysis/** ``` + List keys + ``` k * k analysis/* ``` + Set a key + ``` k foo=bar ``` + Get the value of a key + ``` k foo ``` List all syscalls + ``` k syscall/*~^0x ``` + List all comments + ``` k analysis/meta/*~.C. ``` + Show a comment at given offset: + ``` k %analysis/meta/[1]meta.C.0x100005000 -``` \ No newline at end of file +``` diff --git a/src/basic_commands/sections.md b/src/basic_commands/sections.md index 9c3aca20..59148815 100644 --- a/src/basic_commands/sections.md +++ b/src/basic_commands/sections.md @@ -1,58 +1,84 @@ ## Sections -The concept of sections is tied to the information extracted from the binary. We can display this information by using the `i` command. +The concept of sections is tied to the information extracted from the binary. We can display this information by using +the `i` command. Displaying information about sections: ``` -[0x00005310]> iS -[Sections] -00 0x00000000 0 0x00000000 0 ---- -01 0x00000238 28 0x00000238 28 -r-- .interp -02 0x00000254 32 0x00000254 32 -r-- .note.ABI_tag -03 0x00000278 176 0x00000278 176 -r-- .gnu.hash -04 0x00000328 3000 0x00000328 3000 -r-- .dynsym -05 0x00000ee0 1412 0x00000ee0 1412 -r-- .dynstr -06 0x00001464 250 0x00001464 250 -r-- .gnu.version -07 0x00001560 112 0x00001560 112 -r-- .gnu.version_r -08 0x000015d0 4944 0x000015d0 4944 -r-- .rela.dyn -09 0x00002920 2448 0x00002920 2448 -r-- .rela.plt -10 0x000032b0 23 0x000032b0 23 -r-x .init -... +[0x00001100]> iS +paddr size vaddr vsize align perm name type flags +----------------------------------------------------------------------------------------- +0x00000000 0x0 ---------- 0x0 0x0 ---- NULL +0x00000318 0x1c 0x00000318 0x1c 0x0 -r-- .interp PROGBITS alloc +0x00000338 0x50 0x00000338 0x50 0x0 -r-- .note.gnu.property NOTE alloc +0x00000388 0x20 0x00000388 0x20 0x0 -r-- .note.ABI-tag NOTE alloc +0x000003a8 0x24 0x000003a8 0x24 0x0 -r-- .gnu.hash GNU_HASH alloc +0x000003d0 0x120 0x000003d0 0x120 0x0 -r-- .dynsym DYNSYM alloc +0x000004f0 0xae 0x000004f0 0xae 0x0 -r-- .dynstr STRTAB alloc +0x0000059e 0x18 0x0000059e 0x18 0x0 -r-- .gnu.version VERSYM alloc +0x000005b8 0x30 0x000005b8 0x30 0x0 -r-- .gnu.version_r VERNEED alloc +0x000005e8 0xc0 0x000005e8 0xc0 0x0 -r-- .rela.dyn RELA alloc +0x000006a8 0x90 0x000006a8 0x90 0x0 -r-- .rela.plt RELA alloc,info +0x00001000 0x1b 0x00001000 0x1b 0x0 -r-x .init PROGBITS alloc,execute +0x00001020 0x70 0x00001020 0x70 0x0 -r-x .plt PROGBITS alloc,execute +0x00001090 0x10 0x00001090 0x10 0x0 -r-x .plt.got PROGBITS alloc,execute +0x000010a0 0x60 0x000010a0 0x60 0x0 -r-x .plt.sec PROGBITS alloc,execute +0x00001100 0x199 0x00001100 0x199 0x0 -r-x .text PROGBITS alloc,execute +0x0000129c 0xd 0x0000129c 0xd 0x0 -r-x .fini PROGBITS alloc,execute +0x00002000 0x12 0x00002000 0x12 0x0 -r-- .rodata PROGBITS alloc +0x00002014 0x34 0x00002014 0x34 0x0 -r-- .eh_frame_hdr PROGBITS alloc +0x00002048 0xac 0x00002048 0xac 0x0 -r-- .eh_frame PROGBITS alloc +0x00002d90 0x8 0x00003d90 0x8 0x0 -rw- .init_array INIT_ARRAY write,alloc +0x00002d98 0x8 0x00003d98 0x8 0x0 -rw- .fini_array FINI_ARRAY write,alloc +0x00002da0 0x1f0 0x00003da0 0x1f0 0x0 -rw- .dynamic DYNAMIC write,alloc +0x00002f90 0x70 0x00003f90 0x70 0x0 -rw- .got PROGBITS write,alloc +0x00003000 0x10 0x00004000 0x10 0x0 -rw- .data PROGBITS write,alloc +0x00003010 0x0 0x00004010 0x8 0x0 -rw- .bss NOBITS write,alloc +0x00003010 0x65 ---------- 0x65 0x0 ---- .comment PROGBITS merge,strings +0x00003078 0x2b8 ---------- 0x2b8 0x0 ---- .symtab SYMTAB +0x00003330 0x18a ---------- 0x18a 0x0 ---- .strtab STRTAB +0x000034ba 0x107 ---------- 0x107 0x0 ---- .shstrtab STRTAB ``` -As you may know, binaries have sections and maps. The sections define the contents of a portion of the file that can be mapped in memory (or not). What is mapped is defined by the segments. +As you may know, binaries have sections and maps. The sections define the contents of a portion of the file that can be +mapped in memory (or not). What is mapped is defined by the segments. -Before the IO refactoring done by condret, the `S` command was used to manage what we now call maps. Currently the `S` command is deprecated because `iS` and `om` should be enough. - -Firmware images, bootloaders and binary files usually place various sections of a binary at different addresses in memory. To represent this behavior, rizin offers the `iS`. Use `iS?` to get the help message. To list all created sections use `iS` (or `iSj` to get the json format). The `iS=` will show the region bars in ascii-art. +Firmware images, bootloaders and binary files usually place various sections of a binary at different addresses in +memory. To represent this behavior, rizin offers the `iS`. Use `iS?` to get the help message. To list all created +sections use `iS` (or `iSj` to get the json format). The `iS=` will show the region bars in ascii-art. You can create a new mapping using the `om` subcommand as follows: + ``` om fd vaddr [size] [paddr] [rwx] [name] ``` For Example: ``` -[0x0040100]> om 4 0x00000100 0x00400000 0x0001ae08 rwx test +[0x00001100]> om 4 0x00000100 0x00400000 0x0001ae08 rwx test ``` You can also use `oml` command to view information about mapped sections: ``` -[0x00401000]> oml - 6 fd: 4 +0x0001ae08 0x00000100 - 0x004000ff rwx test - 5 fd: 3 +0x00000000 0x00000000 - 0x0000055f r-- fmap.LOAD0 - 4 fd: 3 +0x00001000 0x00001000 - 0x000011e4 r-x fmap.LOAD1 - 3 fd: 3 +0x00002000 0x00002000 - 0x0000211f r-- fmap.LOAD2 - 2 fd: 3 +0x00002de8 0x00003de8 - 0x0000402f r-- fmap.LOAD3 - 1 fd: 4 +0x00000000 0x00004030 - 0x00004037 rw- mmap.LOAD3 +[0x00001100]> oml + 1 fd: 4 +0x00000000 0x00004020 - 0x0000408f r-- vmap.reloc-targets + 2 fd: 3 +0x00000000 0x00000000 - 0x00000737 r-- fmap.LOAD0 + 3 fd: 3 +0x00001000 0x00001000 - 0x000012a8 r-x fmap.LOAD1 + 4 fd: 3 +0x00002000 0x00002000 - 0x000020f3 r-- fmap.LOAD2 + 5 fd: 5 +0x00000000 0x00004010 - 0x00004017 rw- mmap.LOAD3 + 6 fd: 6 +0x00002d90 0x00003d90 - 0x0000400f r-- vmap.LOAD3 + 7 fd: 4 +0x0001ae08 0x00000100 * 0x004000ff r-x test ``` -Use `om?` to get all the possible subcommands. To list all the defined maps use `oml` (or `omlj` to get the json format). To get the ascii art view use `oml=`. + +Use `om?` to get all the possible subcommands. To list all the defined maps use `oml` (or `omlj` to get the json +format). To get the ascii art view use `oml=`. It is also possible to delete the mapped section using the `om- mapid` command. For Example: + ``` -[0x00401000]> om- 6 +[0x00001100]> om- 7 ``` diff --git a/src/basic_commands/seeking.md b/src/basic_commands/seeking.md index 186d45e6..85cf4585 100644 --- a/src/basic_commands/seeking.md +++ b/src/basic_commands/seeking.md @@ -2,7 +2,8 @@ To move around the file we are inspecting we will need to change the offset at which we are using the `s` command. -The argument is a math expression that can contain flag names, parenthesis, addition, subtraction, multiplication of immediates of contents of memory using brackets. +The argument is a math expression that can contain flag names, parenthesis, addition, subtraction, multiplication of +immediate of contents of memory using brackets. Some example commands: @@ -20,15 +21,18 @@ The second does a relative seek 4 bytes forward. And finally, the last 2 commands are undoing, and redoing the last seek operations in the seek history. -Instead of using just numbers, we can use complex expressions, or basic arithmetic operations to represent the address to seek. +Instead of using just numbers, we can use complex expressions, or basic arithmetic operations to represent the address +to seek. -To do this, check the ?$? Help message which describes the internal variables that can be used in the expressions. For example, this is the same as doing `sd +4` . +To do this, check the ?$? Help message which describes the internal variables that can be used in the expressions. For +example, this is the same as doing `sd +4` . ``` [0x00000000]> s $$+4 ``` -From the debugger (or when emulating) we can also use the register names as references. They are loaded as flags with the `.dr*` command, which happens under the hood. +From the debugger (or when emulating) we can also use the register names as references. They are loaded as flags with +the `.dr*` command, which happens under the hood. ``` [0x00000000]> s rsp+0x40 @@ -63,21 +67,61 @@ Usage: s[?] # Seek commands > s 10+0x80 ; seek at 0x80+10 ``` -If you want to inspect the result of a math expression, you can evaluate it using the `?` command. Simply pass the expression as an argument. The result can be displayed in hexadecimal, decimal, octal or binary formats. +If you want to inspect the result of a math expression, you can evaluate it using the `?` command. Simply pass the +expression as an argument. The result can be displayed in hexadecimal, decimal, octal or binary formats. ``` > % 0x100+200 0x1C8 ; 456d ; 710o ; 1100 1000 ``` -There are also subcommands of `%` that display the output in one specific format (base 10, base 16 ,...). See `%v` for instance. +There are also subcommands of `%` that display the output in one specific format (base 10, base 16 ,...). See `%v` for +instance. -In the visual mode, you can press `u` (undo) or `U` (redo) inside the seek history to return back to previous or forward to the next location. +In the visual mode, you can press `u` (undo) or `U` (redo) inside the seek history to return back to previous or +forward to the next location. ## Open file -As a test file, let's use a simple `hello_world.c` compiled in Linux ELF format. -After we compile it let's open it with rizin: +As a test file, let's use a simple [hello_world](https://github.com/rizinorg/book/tree/master/examples/hello_world) +compiled in Linux ELF 64-bit format: + +```cpp +#include +#include +#include + +/* + * This is simple hello_world program made for education purposes. + * Licensed under CC-BY-SA 4.0 license. + * + * In order to compile executable run: + * + * gcc -o hello_world hello_world.c + * + */ + +int main(int argc, char* argv[]) { + const char *str1 = "Hello "; + const char *str2 = "world!"; + + size_t str1_size = strlen(str1); + size_t str2_size = strlen(str2); + + char *output = malloc(str1_size + str2_size + 1); + if (output) { + strcpy(output, str1); + strcat(output, str2); + + puts(output); + free(output); + } + + return 0; +} +``` + +After we compile it with `gcc -o hello_world hello_world.c` let's open it with rizin: ``` $ rizin hello_world @@ -86,7 +130,7 @@ $ rizin hello_world Now we have the command prompt: ``` -[0x00400410]> +[0x00001100]> ``` And it is time to go deeper. @@ -99,7 +143,7 @@ such as hex, octal, binary or decimal. Seek to an address 0x0. An alternative command is simply `0x0` ``` -[0x00400410]> s 0x0 +[0x00001100]> s 0x0 [0x00000000]> ``` @@ -124,26 +168,20 @@ Undo last two seeks to return to the initial address: ``` [0x00000080]> shu [0x00000000]> shu -[0x00400410]> +[0x00001100]> ``` -We are back at _0x00400410_. +We are back at `0x00001100`. There's also a command to show the seek history: ``` -[0x00400410]> sh -0x400410 -0x40041a -0x400410 -0x400411 -0x400410 # current seek -0x4005b4 # redo +[0x00001100]> sh +0x1100 entry0 # current seek +0x0 segment.LOAD0 # redo +0x80 segment.PHDR+64 # redo [0x00400410]> sh* -f undo_3 @ 0x400410 -f undo_2 @ 0x40041a -f undo_1 @ 0x400410 -f undo_0 @ 0x400411 -# Current seek @ 0x400410 -f redo_0 @ 0x4005b4 -``` \ No newline at end of file +# Current seek @ 0x1100 +f redo_0 @ 0x0 +f redo_1 @ 0x80 +``` diff --git a/src/basic_commands/types.md b/src/basic_commands/types.md deleted file mode 100644 index 1cb26271..00000000 --- a/src/basic_commands/types.md +++ /dev/null @@ -1,46 +0,0 @@ -## Working with data types - -Rizin can also work with data types. You can use standard C data types or define your own using C. Currently, there is a support for structs, unions, function signatures, and enums. - -``` -[0x00000000]> t? -Usage: t[?] # Types, noreturn, signatures, C parser and more -| t[j*] [] # List all types / Show type information -| t- # Remove the type -| t-* # Remove all types -| tc[dc] # List loaded types in C format -| td # Define type from C definition -| te[jbcdf] # List loaded enums -| tf[j-c?] # List loaded functions definitions -| tn[j-?] # Manage noreturn function attributes and marks -| to[es] # Open C header file and load types from it -| tp[vx] # Print formatted type casted to the address -| ts[jlcd] # List loaded structures -| tt[jc] # List loaded typedefs -| tu[jlcd] # List loaded unions -| tx[fgl] # Type xrefs -``` - -### Defining new types - -There are three different methods to define new types: - -1. Defining a new type from rizin shell immediately, to do this you will use `td` command, and put the whole line between double quotes. For example: - -`td "struct person {int age; char *name; char *address;};"` - -2. You can also use `to -` to open a text editor and write your own types in there. This is preferable when you got too many types to define. - -3. Rizin also supports loading header files using the command `to` followed by a path to the header file you want to load. - -You can View loaded types in rizin using `ts` for structures, `tu` for unions, `tf` for function signatures, `te` for enums. - -You can also cast pointers to data types and view data in there accordingly with `tp`. EX: - -``` -[0x00400511]> tp person = 0x7fff170a46b0 - age : 0x7fff170a46b0 = 20 - name : (*0x4005b0) 0x7fff170a46b4 = My name - address : (*0x4005b8) 0x7fff170a46bc = My age -[0x00400511]> -``` \ No newline at end of file diff --git a/src/basic_commands/write.md b/src/basic_commands/write.md index a389e3f8..a32dc7fb 100644 --- a/src/basic_commands/write.md +++ b/src/basic_commands/write.md @@ -2,15 +2,20 @@ To be able to use Rizin in write mode, you need to load your binary as such: `rizin -w bin`. -Rizin can manipulate a loaded binary file in many ways. You can resize the file, move and copy/paste bytes, insert new bytes (shifting data to the end of the block or file), or simply overwrite bytes. New data may be given as a wide-string, assembler instructions, or the data may be read in from another file. +Rizin can manipulate a loaded binary file in many ways. You can resize the file, move and copy/paste bytes, insert +new bytes (shifting data to the end of the block or file), or simply overwrite bytes. New data may be given as +a wide-string, assembler instructions, or the data may be read in from another file. -Resize the file using the `r` command. It accepts a numeric argument. A positive value sets a new size for the file. A negative one will truncate the file to the current seek position minus N bytes. +Resize the file using the `r` command. It accepts a numeric argument. A positive value sets a new size for the file. +A negative one will truncate the file to the current seek position minus N bytes. ``` r 1024 ; resize the file to 1024 bytes r -10 @ 33 ; strip 10 bytes at offset 33 ``` -Write bytes using the `w` command. It accepts multiple input formats like inline assembly, endian-friendly dwords, files, hexpair files, wide strings: + +Write bytes using the `w` command. It accepts multiple input formats like inline assembly, endian-friendly dwords, +files, hexpair files, wide strings: ``` [0x00404888]> w? @@ -78,7 +83,8 @@ Examples: | wo4 # Content before: 1122334455667788; Content after: 4433221188776655 ``` -It is possible to implement cipher-algorithms using rizin core primitives and `wo`. A sample session performing xor(90) + add(01, 02): +It is possible to implement cipher-algorithms using rizin core primitives and `wo`. A sample session performing +xor(90) + add(01, 02): ``` [0x7fcd6a891630]> px @@ -101,4 +107,4 @@ It is possible to implement cipher-algorithms using rizin core primitives and `w 0x7fcd6a891640 91de 1a7e d91f 96db 14d9 9593 1401 9593 0x7fcd6a891650 c4da 1a6d e89a d959 9192 9159 1cb1 d959 0x7fcd6a891660 9192 79cb 81da 1652 81da 1456 a252 7c77 -``` \ No newline at end of file +``` diff --git a/src/basic_commands/yank_paste.md b/src/basic_commands/yank_paste.md index 99908bf9..27830603 100644 --- a/src/basic_commands/yank_paste.md +++ b/src/basic_commands/yank_paste.md @@ -9,9 +9,11 @@ The two basic operations are * copy (yank) * paste -The yank operation will read N bytes (specified by the argument) into the clipboard. We can later use the `yy` command to paste what we read before into a file. +The yank operation will read N bytes (specified by the argument) into the clipboard. We can later use the `yy` command +to paste what we read before into a file. -You can yank/paste bytes in visual mode selecting them with the cursor mode (`Vc`) and then using the `y` and `Y` key bindings which are aliases for `y` and `yy` commands of the command-line interface. +You can yank/paste bytes in visual mode selecting them with the cursor mode (`Vc`) and then using the `y` and `Y` key +bindings which are aliases for `y` and `yy` commands of the command-line interface. ``` [0x00000000]> y? diff --git a/src/crackmes/avatao/01-reverse4/bytecode.md b/src/crackmes/avatao/01-reverse4/bytecode.md index ebc465a3..3233c822 100644 --- a/src/crackmes/avatao/01-reverse4/bytecode.md +++ b/src/crackmes/avatao/01-reverse4/bytecode.md @@ -1,5 +1,4 @@ -.bytecode ---------- +## .bytecode Well, we did the reverse engineering part, now we have to write a program for the VM with the instruction set described in the previous paragraph. Here is diff --git a/src/crackmes/avatao/01-reverse4/first_steps.md b/src/crackmes/avatao/01-reverse4/first_steps.md index 61e536ec..d69b35cd 100644 --- a/src/crackmes/avatao/01-reverse4/first_steps.md +++ b/src/crackmes/avatao/01-reverse4/first_steps.md @@ -1,5 +1,4 @@ -.first_steps ------------- +## .first_steps OK, enough of praising rizin, lets start reversing this stuff. First, you have to know your enemy: @@ -34,7 +33,7 @@ binsz 8620 > be used to extract information (imports, symbols, libraries, etc.) about > binary executables. As always, check the help (rz-bin -h)! -So, its a dynamically linked, stripped, 64bit Linux executable - nothing fancy +So, it's a dynamically linked, stripped, 64bit Linux executable - nothing fancy here. Let's try to run it: ``` @@ -92,7 +91,7 @@ We can list all the strings rizin found: [0x00400720]> ``` -> ***rizin tip***: rizin puts so called flags on important/interesting offsets, and +> ***rizin tip***: rizin puts so-called flags on important/interesting offsets, and > organizes these flags into flagspaces (strings, functions, symbols, etc.) You > can list all flagspaces using *fs*, and switch the current one using > *fs [flagspace]* (the default is \*, which means all the flagspaces). The diff --git a/src/crackmes/avatao/01-reverse4/instructionset.md b/src/crackmes/avatao/01-reverse4/instructionset.md index d320d6a9..8f832868 100644 --- a/src/crackmes/avatao/01-reverse4/instructionset.md +++ b/src/crackmes/avatao/01-reverse4/instructionset.md @@ -1,21 +1,20 @@ -.instructionset ---------------- +## .instructionset We've now reversed all the VM instructions, and have a full understanding about how it works. Here is the VM's instruction set: -| Instruction | 1st arg | 2nd arg | What does it do? -| ----------- | ------- | ------- | ---------------- -| "A" | "M" | arg2 | \*sym.current_memory_ptr += arg2 -| | "P" | arg2 | sym.current_memory_ptr += arg2 -| | "C" | arg2 | sym.written_by_instr_C += arg2 -| "S" | "M" | arg2 | \*sym.current_memory_ptr -= arg2 -| | "P" | arg2 | sym.current_memory_ptr -= arg2 -| | "C" | arg2 | sym.written_by_instr_C -= arg2 -| "I" | arg1 | n/a | instr_A(arg1, 1) -| "D" | arg1 | n/a | instr_S(arg1, 1) -| "P" | arg1 | n/a | \*sym.current_memory_ptr = arg1; instr_I("P") -| "X" | arg1 | n/a | \*sym.current_memory_ptr ^= arg1 +| Instruction | 1st arg | 2nd arg | What does it do? | +|-------------|---------|---------|-----------------------------------------------| +| "A" | "M" | arg2 | \*sym.current_memory_ptr += arg2 | +| | "P" | arg2 | sym.current_memory_ptr += arg2 | +| | "C" | arg2 | sym.written_by_instr_C += arg2 | +| "S" | "M" | arg2 | \*sym.current_memory_ptr -= arg2 | +| | "P" | arg2 | sym.current_memory_ptr -= arg2 | +| | "C" | arg2 | sym.written_by_instr_C -= arg2 | +| "I" | arg1 | n/a | instr_A(arg1, 1) | +| "D" | arg1 | n/a | instr_S(arg1, 1) | +| "P" | arg1 | n/a | \*sym.current_memory_ptr = arg1; instr_I("P") | +| "X" | arg1 | n/a | \*sym.current_memory_ptr ^= arg1 | | "J" | arg1 | n/a | arg1_and_0x3f = arg1 & 0x3f;
if (arg1 & 0x40 != 0)
  arg1_and_0x3f \*= -1
if (arg1 >= 0) return arg1_and_0x3f;
else if (\*sym.written_by_instr_C != 0) {
  if (arg1_and_0x3f < 0)
    ++\*sym.good_if_ne_zero;
  return arg1_and_0x3f;
} else return 2; | -| "C" | arg1 | n/a | \*sym.written_by_instr_C = arg1 -| "R" | arg1 | n/a | return(arg1) +| "C" | arg1 | n/a | \*sym.written_by_instr_C = arg1 | +| "R" | arg1 | n/a | return(arg1) | diff --git a/src/crackmes/avatao/01-reverse4/intro.md b/src/crackmes/avatao/01-reverse4/intro.md index 509d5ae3..e7864cab 100644 --- a/src/crackmes/avatao/01-reverse4/intro.md +++ b/src/crackmes/avatao/01-reverse4/intro.md @@ -1,5 +1,4 @@ -Avatao R3v3rs3 4 ------- +## Avatao R3v3rs3 4 After a few years of missing out on wargames at [Hacktivity](https://hacktivity.com), this year I've finally found the time to diff --git a/src/crackmes/avatao/01-reverse4/main.md b/src/crackmes/avatao/01-reverse4/main.md index 5e2a7847..8259bf07 100644 --- a/src/crackmes/avatao/01-reverse4/main.md +++ b/src/crackmes/avatao/01-reverse4/main.md @@ -1,5 +1,4 @@ -.main ------ +## .main As I was saying, I usually take a look at the entry point, so let's just do that: @@ -46,12 +45,12 @@ look at a function: > It is possible to bring up the prompt in visual mode using the *:* key, and > you can use *o* to seek. -Lets read main node-by-node! The first block looks like this: +Let's read main node-by-node! The first block looks like this: ![main bb-0c63](img/main/bb-0c63.png) We can see that the program reads a word (2 bytes) into the local variable named -*local_10_6*, and than compares it to 0xbb8. That's 3000 in decimal: +*local_10_6*, and then compares it to 0xbb8. That's 3000 in decimal: ``` [0x00400c63]> % 0xbb8 @@ -179,7 +178,7 @@ the bytecode, and exits: OK, so now we know that we have to supply a bytecode that will generate that string when executed. As we can see on the minimap, there are still a few more -branches ahead, which probably means more conditions to meet. Lets investigate +branches ahead, which probably means more conditions to meet. Let's investigate them before we delve into *vmloop*! If you take a look at the minimap of the whole function, you can probably @@ -239,7 +238,7 @@ more checks: This piece of code may look a bit strange if you are not familiar with x86_64 specific stuff. In particular, we are talking about RIP-relative addressing, where offsets are described as displacements from the current instruction -pointer, which makes implementing PIE easier. Anyways, rizin is nice enough to +pointer, which makes implementing PIE easier. Anyway, rizin is nice enough to display the actual address (0x602104). Got the address, flag it! ``` diff --git a/src/crackmes/avatao/01-reverse4/outro.md b/src/crackmes/avatao/01-reverse4/outro.md index 97dbaefd..0931e5f4 100644 --- a/src/crackmes/avatao/01-reverse4/outro.md +++ b/src/crackmes/avatao/01-reverse4/outro.md @@ -1,5 +1,4 @@ -.outro ------- +## .outro Well, what can I say? Such VM, much reverse! :) diff --git a/src/crackmes/avatao/01-reverse4/rizin.md b/src/crackmes/avatao/01-reverse4/rizin.md index dec29085..dbdeda8a 100644 --- a/src/crackmes/avatao/01-reverse4/rizin.md +++ b/src/crackmes/avatao/01-reverse4/rizin.md @@ -1,8 +1,7 @@ -.rizin --------- +## .rizin I've decided to solve the reversing challenges using -[rizin](http://www.rizin.org/r/), a free and open source reverse engineering +[rizin](http://rizin.re), a free and open source reverse engineering framework. I have first learned about rizin back in 2011. during a huge project, where I had to reverse a massive, 11MB statically linked ELF. I simply needed something that I could easily patch Linux ELFs with. Granted, back then I've diff --git a/src/crackmes/avatao/01-reverse4/vmloop.md b/src/crackmes/avatao/01-reverse4/vmloop.md index d439f65c..38cd0d24 100644 --- a/src/crackmes/avatao/01-reverse4/vmloop.md +++ b/src/crackmes/avatao/01-reverse4/vmloop.md @@ -1,5 +1,4 @@ -.vmloop -------- +## .vmloop ``` [offset]> fcn.vmloop @@ -24,7 +23,7 @@ First, lets analyze what we already have! First, *rdi* is put into local_3. Since the application is a 64bit Linux executable, we know that *rdi* is the first function argument (as you may have recognized, the automatic analysis of arguments and local variables was not entirely correct), and we also know that -*vmloop*'s first argument is the bytecode. So lets rename local_3: +*vmloop*'s first argument is the bytecode. So let's rename local_3: ``` :> afvn local_3 bytecode @@ -102,7 +101,7 @@ This is how the disassembly looks like after we add this metadata: ``` As we can see, the address 0x400c04 is used a lot, and besides that there are 9 -different addresses. Lets see that 0x400c04 first! +different addresses. Let's see that 0x400c04 first! ![vmloop bb-0c04](img/vmloop/bb-0c04.png) @@ -161,7 +160,7 @@ how we can create the missing basic blocks for the instructions: ``` It is also apparent from the disassembly that besides the instructions there -are three more basic blocks. Lets create them too! +are three more basic blocks. Let's create them too! ``` [0x00400ec0]> afb+ 0x00400a45 0x00400c15 0x00400c2d-0x00400c15 0x400c3c 0x00400c2d @@ -187,7 +186,7 @@ By the way, here is how IDA's graph of this same function looks like for compari ![IDA graph](img/vmloop_ida.png) As we browse through the disassembly of the *instr_LETTER* basic blocks, we -should realize a few things. The first: all of the instructions starts with a +should realize a few things. The first: all the instructions starts with a sequence like these: ![vmloop bb-0a80](img/vmloop/bb-0a80.png) @@ -196,9 +195,9 @@ sequence like these: It became clear now that the 9 dwords at *sym.instr_dirty* are not simply indicators that an instruction got executed, but they are used to count how many -times an instruction got called. Also I should have realized earlier that +times an instruction got called. Also, I should have realized earlier that *sym.good_if_le_9* (0x6020f0) is part of this 9 dword array, but yeah, well, I -didn't, I have to live with it... Anyways, what the condition +didn't, I have to live with it... Anyway, what the condition "*sym.good_if_le_9* have to be lesser or equal 9" really means is that *instr_P* can not be executed more than 9 times: @@ -249,7 +248,7 @@ that address! ``` Oh, and by the way, I do have a hunch that *instr_C* also had a function call in -the original code, but it got inlined by the compiler. Anyways, so far we have +the original code, but it got inlined by the compiler. Anyway, so far we have these two instructions: - *instr_R(a1):* returns with *a1* @@ -396,9 +395,9 @@ not the case here - e.g. the larger grey boxes are clearly not identical. This is something I'm definitely going to take a deeper look at after I've finished this writeup. -Anyways, after we get over the shock of being lied to, we can easily recognize +Anyway, after we get over the shock of being lied to, we can easily recognize that *instr_S* is basically a reverse-*instr_A*: where the latter does addition, -the former does subtraction. To summarize this: +the former does' subtraction. To summarize this: - *arg1* == "M": subtracts *arg2* from the byte at *sym.current_memory_ptr*. - *arg1* == "P": steps *sym.current_memory_ptr* backwards by *arg2* bytes. @@ -433,8 +432,8 @@ It's local var rename time again! This function is pretty straightforward also, but there is one oddity: const_M is never used. I don't know why it is there - maybe it is supposed to be some -kind of distraction? Anyways, this function simply writes *arg1* to -*sym.current_memory_ptr*, and than calls *instr_I("P")*. This basically means +kind of distraction? Anyway, this function simply writes *arg1* to +*sym.current_memory_ptr*, and then calls *instr_I("P")*. This basically means that *instr_P* is used to write one byte, and put the pointer to the next byte. So far this would seem the ideal instruction to construct most of the "Such VM! MuCH reV3rse!" string, but remember, this is also the one that can be used only @@ -442,7 +441,7 @@ MuCH reV3rse!" string, but remember, this is also the one that can be used only ###instr_X -Another simple one, rename local vars anyways! +Another simple one, rename local vars anyway! ``` :> afvn local_1 arg1 diff --git a/src/crackmes/hackthebox/find-the-easy-pass/identification.md b/src/crackmes/hackthebox/find-the-easy-pass/identification.md index 78d1501e..f1db81ba 100644 --- a/src/crackmes/hackthebox/find-the-easy-pass/identification.md +++ b/src/crackmes/hackthebox/find-the-easy-pass/identification.md @@ -3,7 +3,7 @@ After un-compressing the challenge file `Find The Easy Pass.zip`, we can find a file named `EasyPass.exe` inside it. -We using `rz-bin` to identify the executable file. +We're using `rz-bin` to identify the executable file. ```bash C:\Users\User\Desktop\htb>rz-bin -I EasyPass.exe diff --git a/src/crackmes/intro.md b/src/crackmes/intro.md index 694945ef..65a3a1f8 100644 --- a/src/crackmes/intro.md +++ b/src/crackmes/intro.md @@ -1,5 +1,5 @@ Crackmes ======== -Crackmes (from "crack me" challenge) are the training ground for reverse engineering people. This section will go over tutorials on how to defeat various crackmes using rizin. - +Crackmes (from "crack me" challenge) are the training ground for reverse engineering people. This section will go over +tutorials on how to defeat various crackmes using Rizin. diff --git a/src/crackmes/ioli/intro.md b/src/crackmes/ioli/intro.md index da09aefc..d79d992c 100644 --- a/src/crackmes/ioli/intro.md +++ b/src/crackmes/ioli/intro.md @@ -1,6 +1,8 @@ IOLI CrackMes ============= -The IOLI crackme is a good starting point for learning rizin. This is a set of tutorials based on the tutorial at [dustri](https://dustri.org/b/defeating-ioli-with-radare2.html) +The IOLI crackme is a good starting point for learning Rizin. This is a set of tutorials based on the tutorial +at [dustri](https://dustri.org/b/defeating-ioli-with-radare2.html) -The IOLI crackmes are available at a locally hosted [mirror](https://github.com/rizinorg/book/raw/master/src/crackmes/ioli/IOLI-crackme.tar.gz) +The IOLI crackmes are available at a locally hosted +[mirror](https://github.com/rizinorg/book/raw/master/src/crackmes/ioli/IOLI-crackme.tar.gz) diff --git a/src/crackmes/ioli/ioli_0x00.md b/src/crackmes/ioli/ioli_0x00.md index 0e99a2e7..7b3dc9b9 100644 --- a/src/crackmes/ioli/ioli_0x00.md +++ b/src/crackmes/ioli/ioli_0x00.md @@ -1,5 +1,4 @@ -IOLI 0x00 -========= +# IOLI 0x00 This is the first IOLI crackme, and the easiest one. @@ -10,15 +9,16 @@ Password: 1234 Invalid Password! ``` -The first thing to check is if the password is just plaintext inside the file. In this case, we don't need to do any disassembly, and we can just use rz-bin with the -z flag to search for strings in the binary. +The first thing to check is if the password is just plaintext inside the file. In this case, we don't need to do +any disassembly, and we can just use rz-bin with the -z flag to search for strings in the binary. ``` $ rz-bin -z ./crackme0x00 [Strings] -nth paddr vaddr len size section type string -――――――――――――――――――――――――――――――――――――――――――――――――――――――― +nth paddr vaddr len size section type string +--------------------------------------------------------------------------- 0 0x00000568 0x08048568 24 25 .rodata ascii IOLI Crackme Level 0x00\n -1 0x00000581 0x08048581 10 11 .rodata ascii Password: +1 0x00000581 0x08048581 10 11 .rodata ascii Password: 2 0x0000058f 0x0804858f 6 7 .rodata ascii 250382 3 0x00000596 0x08048596 18 19 .rodata ascii Invalid Password!\n 4 0x000005a9 0x080485a9 15 16 .rodata ascii Password OK :)\n diff --git a/src/crackmes/ioli/ioli_0x01.md b/src/crackmes/ioli/ioli_0x01.md index 16592e59..8a74f357 100644 --- a/src/crackmes/ioli/ioli_0x01.md +++ b/src/crackmes/ioli/ioli_0x01.md @@ -1,5 +1,4 @@ -IOLI 0x01 -========= +# IOLI 0x01 This is the second IOLI crackme. @@ -15,10 +14,10 @@ Let's check for strings with rz-bin. ``` $ rz-bin -z ./crackme0x01 [Strings] -nth paddr vaddr len size section type string -――――――――――――――――――――――――――――――――――――――――――――――――――――――― +nth paddr vaddr len size section type string +--------------------------------------------------------------------------- 0 0x00000528 0x08048528 24 25 .rodata ascii IOLI Crackme Level 0x01\n -1 0x00000541 0x08048541 10 11 .rodata ascii Password: +1 0x00000541 0x08048541 10 11 .rodata ascii Password: 2 0x0000054f 0x0804854f 18 19 .rodata ascii Invalid Password!\n 3 0x00000562 0x08048562 15 16 .rodata ascii Password OK :)\n ``` @@ -27,64 +26,58 @@ This isn't going to be as easy as 0x00. Let's try disassembly with rizin. ``` $ rizin ./crackme0x01 --- Use `zoom.byte=printable` in zoom mode ('z' in Visual mode) to find strings [0x08048330]> aa [0x08048330]> pdf @ main ; DATA XREF from entry0 @ 0x8048347 -/ 113: int main (int argc, char **argv, char **envp); -| ; var int32_t var_4h @ ebp-0x4 -| ; var int32_t var_sp_4h @ esp+0x4 -| 0x080483e4 55 push ebp -| 0x080483e5 89e5 mov ebp, esp -| 0x080483e7 83ec18 sub esp, 0x18 -| 0x080483ea 83e4f0 and esp, 0xfffffff0 -| 0x080483ed b800000000 mov eax, 0 -| 0x080483f2 83c00f add eax, 0xf ; 15 -| 0x080483f5 83c00f add eax, 0xf ; 15 -| 0x080483f8 c1e804 shr eax, 4 -| 0x080483fb c1e004 shl eax, 4 -| 0x080483fe 29c4 sub esp, eax -| 0x08048400 c70424288504. mov dword [esp], str.IOLI_Crackme_Level_0x01 ; [0x8048528:4]=0x494c4f49 ; "IOLI Crackme Level 0x01\n" -| 0x08048407 e810ffffff call sym.imp.printf ; int printf(const char *format) -| 0x0804840c c70424418504. mov dword [esp], str.Password: ; [0x8048541:4]=0x73736150 ; "Password: " -| 0x08048413 e804ffffff call sym.imp.printf ; int printf(const char *format) -| 0x08048418 8d45fc lea eax, [var_4h] -| 0x0804841b 89442404 mov dword [var_sp_4h], eax -| 0x0804841f c704244c8504. mov dword [esp], 0x804854c ; [0x804854c:4]=0x49006425 -| 0x08048426 e8e1feffff call sym.imp.scanf ; int scanf(const char *format) -| 0x0804842b 817dfc9a1400. cmp dword [var_4h], 0x149a -| ,=< 0x08048432 740e je 0x8048442 -| | 0x08048434 c704244f8504. mov dword [esp], str.Invalid_Password ; [0x804854f:4]=0x61766e49 ; "Invalid Password!\n" -| | 0x0804843b e8dcfeffff call sym.imp.printf ; int printf(const char *format) -| ,==< 0x08048440 eb0c jmp 0x804844e -| |`-> 0x08048442 c70424628504. mov dword [esp], str.Password_OK_: ; [0x8048562:4]=0x73736150 ; "Password OK :)\n" -| | 0x08048449 e8cefeffff call sym.imp.printf ; int printf(const char *format) +/ int main(int argc, char **argv, char **envp); +| ; var int32_t var_18h @ stack - 0x18 +| ; var int32_t var_8h @ stack - 0x8 +| 0x080483e4 push ebp +| 0x080483e5 mov ebp, esp +| 0x080483e7 sub esp, 0x18 +| 0x080483ea and esp, 0xfffffff0 +| 0x080483ed mov eax, 0 +| 0x080483f2 add eax, 0xf ; 15 +| 0x080483f5 add eax, 0xf ; 15 +| 0x080483f8 shr eax, 4 +| 0x080483fb shl eax, 4 +| 0x080483fe sub esp, eax +| 0x08048400 mov dword [esp], str.IOLI_Crackme_Level_0x01 ; [0x8048528:4]=0x494c4f49 ; "IOLI Crackme Level 0x01\n" +| 0x08048407 call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) +| 0x0804840c mov dword [esp], str.Password: ; [0x8048541:4]=0x73736150 ; "Password: " +| 0x08048413 call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) +| 0x08048418 lea eax, [var_8h] +| 0x0804841b mov dword [var_18h], eax +| 0x0804841f mov dword [esp], 0x804854c ; [0x804854c:4]=0x49006425 +| 0x08048426 call sym.imp.scanf ; sym.imp.scanf ; int scanf(const char *format) +| 0x0804842b cmp dword [var_8h], 0x149a +| ,=< 0x08048432 je 0x8048442 +| | 0x08048434 mov dword [esp], str.Invalid_Password ; [0x804854f:4]=0x61766e49 ; "Invalid Password!\n" +| | 0x0804843b call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) +| ,==< 0x08048440 jmp 0x804844e +| |`-> 0x08048442 mov dword [esp], str.Password_OK_: ; [0x8048562:4]=0x73736150 ; "Password OK :)\n" +| | 0x08048449 call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) | | ; CODE XREF from main @ 0x8048440 -| `--> 0x0804844e b800000000 mov eax, 0 -| 0x08048453 c9 leave -\ 0x08048454 c3 ret +| `--> 0x0804844e mov eax, 0 +| 0x08048453 leave +\ 0x08048454 ret ``` -"aa" tells rizin to analyze the whole binary, which gets you symbol names, among things. +"aa" tells Rizin to analyze the whole binary, which gets you symbol names, among things. -"pdf" stands for +"pdf" stands for **p**rint, **d**isassemble, **f**unction. -* Print - -* Disassemble - -* Function - -This will print the disassembly of the main function, or the `main()` that everyone knows. You can see several things as well: weird names, arrows, etc. +This will print the disassembly of the main function, or the `main()` that everyone knows. You can see several +things as well: weird names, arrows, etc. * "imp." stands for imports. Those are imported symbols, like printf() - * "str." stands for strings. Those are strings (obviously). -If you look carefully, you'll see a `cmp` instruction, with a constant, 0x149a. `cmp` is an x86 compare instruction, and the 0x in front of it specifies it is in base 16, or hex (hexadecimal). +If you look carefully, you'll see a `cmp` instruction, with a constant, 0x149a. `cmp` is an x86 compare instruction, +and the 0x in front of it specifies it is in base 16, or hex (hexadecimal). ``` -0x0804842b 817dfc9a140. cmp dword [ebp + 0xfffffffc], 0x149a +│ 0x0804842b cmp dword [var_8h], 0x149a ``` You can use rizin's `%` command to display 0x149a in another numeric base. @@ -114,7 +107,9 @@ Password: 5274 Password OK :) ``` -Bingo, the password was 5274. In this case, the password function at 0x0804842b was comparing the input against the value, 0x149a in hex. Since user input is usually decimal, it was a safe bet that the input was intended to be in decimal, or 5274. Now, since we're hackers, and curiosity drives us, let's see what happens when we input in hex. +Bingo, the password was 5274. In this case, the password function at 0x0804842b was comparing the input against +the value, 0x149a in hex. Since user input is usually decimal, it was a safe bet that the input was intended to be +in decimal, or 5274. Now, since we're hackers, and curiosity drives us, let's see what happens when we input in hex. ``` $ ./crackme0x01 @@ -123,6 +118,7 @@ Password: 0x149a Invalid Password! ``` -It was worth a shot, but it doesn't work. That's because `scanf()` will take the 0 in 0x149a to be a zero, rather than accepting the input as actually being the hex value. +It was worth a shot, but it doesn't work. That's because `scanf()` will take the 0 in 0x149a to be a zero, +rather than accepting the input as actually being the hex value. And this concludes IOLI 0x01. diff --git a/src/crackmes/ioli/ioli_0x02.md b/src/crackmes/ioli/ioli_0x02.md index f875f88f..2d7d5904 100644 --- a/src/crackmes/ioli/ioli_0x02.md +++ b/src/crackmes/ioli/ioli_0x02.md @@ -1,5 +1,4 @@ -IOLI 0x02 -========= +# IOLI 0x02 This is the third one. @@ -11,110 +10,122 @@ Invalid Password! ``` Firstly, let's check it with rz-bin. + ``` $ rz-bin -z ./crackme0x02 [Strings] -nth paddr vaddr len size section type string -――――――――――――――――――――――――――――――――――――――――――――――――――――――― +nth paddr vaddr len size section type string +--------------------------------------------------------------------------- 0 0x00000548 0x08048548 24 25 .rodata ascii IOLI Crackme Level 0x02\n -1 0x00000561 0x08048561 10 11 .rodata ascii Password: +1 0x00000561 0x08048561 10 11 .rodata ascii Password: 2 0x0000056f 0x0804856f 15 16 .rodata ascii Password OK :)\n 3 0x0000057f 0x0804857f 18 19 .rodata ascii Invalid Password!\n ``` Similar to 0x01, there's no explicit password string here. So, it's time to analyze it with Rizin. + ``` +$ rizin ./crackme0x02 [0x08048330]> aa [x] Analyze all flags starting with sym. and entry0 (aa) -[0x08048330]> pdf@main +[0x08048330]> pdf @ main ; DATA XREF from entry0 @ 0x8048347 -/ 144: int main (int argc, char **argv, char **envp); -| ; var int32_t var_ch @ ebp-0xc -| ; var int32_t var_8h @ ebp-0x8 -| ; var int32_t var_4h @ ebp-0x4 -| ; var int32_t var_sp_4h @ esp+0x4 -| 0x080483e4 55 push ebp -| 0x080483e5 89e5 mov ebp, esp -| 0x080483e7 83ec18 sub esp, 0x18 -| 0x080483ea 83e4f0 and esp, 0xfffffff0 -| 0x080483ed b800000000 mov eax, 0 -| 0x080483f2 83c00f add eax, 0xf ; 15 -| 0x080483f5 83c00f add eax, 0xf ; 15 -| 0x080483f8 c1e804 shr eax, 4 -| 0x080483fb c1e004 shl eax, 4 -| 0x080483fe 29c4 sub esp, eax -| 0x08048400 c70424488504. mov dword [esp], str.IOLI_Crackme_Level_0x02 ; [0x8048548:4]=0x494c4f49 ; "IOLI Crackme Level 0x02\n" -| 0x08048407 e810ffffff call sym.imp.printf ; int printf(const char *format) -| 0x0804840c c70424618504. mov dword [esp], str.Password: ; [0x8048561:4]=0x73736150 ; "Password: " -| 0x08048413 e804ffffff call sym.imp.printf ; int printf(const char *format) -| 0x08048418 8d45fc lea eax, [var_4h] -| 0x0804841b 89442404 mov dword [var_sp_4h], eax -| 0x0804841f c704246c8504. mov dword [esp], 0x804856c ; [0x804856c:4]=0x50006425 -| 0x08048426 e8e1feffff call sym.imp.scanf ; int scanf(const char *format) -| 0x0804842b c745f85a0000. mov dword [var_8h], 0x5a ; 'Z' ; 90 -| 0x08048432 c745f4ec0100. mov dword [var_ch], 0x1ec ; 492 -| 0x08048439 8b55f4 mov edx, dword [var_ch] -| 0x0804843c 8d45f8 lea eax, [var_8h] -| 0x0804843f 0110 add dword [eax], edx -| 0x08048441 8b45f8 mov eax, dword [var_8h] -| 0x08048444 0faf45f8 imul eax, dword [var_8h] -| 0x08048448 8945f4 mov dword [var_ch], eax -| 0x0804844b 8b45fc mov eax, dword [var_4h] -| 0x0804844e 3b45f4 cmp eax, dword [var_ch] -| ,=< 0x08048451 750e jne 0x8048461 -| | 0x08048453 c704246f8504. mov dword [esp], str.Password_OK_: ; [0x804856f:4]=0x73736150 ; "Password OK :)\n" -| | 0x0804845a e8bdfeffff call sym.imp.printf ; int printf(const char *format) -| ,==< 0x0804845f eb0c jmp 0x804846d -| |`-> 0x08048461 c704247f8504. mov dword [esp], str.Invalid_Password ; [0x804857f:4]=0x61766e49 ; "Invalid Password!\n" -| | 0x08048468 e8affeffff call sym.imp.printf ; int printf(const char *format) +/ int main(int argc, char **argv, char **envp); +| ; var int32_t var_18h @ stack - 0x18 +| ; var int32_t var_10h @ stack - 0x10 +| ; var int32_t var_ch @ stack - 0xc +| ; var int32_t var_8h @ stack - 0x8 +| 0x080483e4 push ebp +| 0x080483e5 mov ebp, esp +| 0x080483e7 sub esp, 0x18 +| 0x080483ea and esp, 0xfffffff0 +| 0x080483ed mov eax, 0 +| 0x080483f2 add eax, 0xf ; 15 +| 0x080483f5 add eax, 0xf ; 15 +| 0x080483f8 shr eax, 4 +| 0x080483fb shl eax, 4 +| 0x080483fe sub esp, eax +| 0x08048400 mov dword [esp], str.IOLI_Crackme_Level_0x02 ; [0x8048548:4]=0x494c4f49 ; "IOLI Crackme Level 0x02\n" +| 0x08048407 call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) +| 0x0804840c mov dword [esp], str.Password: ; [0x8048561:4]=0x73736150 ; "Password: " +| 0x08048413 call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) +| 0x08048418 lea eax, [var_8h] +| 0x0804841b mov dword [var_18h], eax +| 0x0804841f mov dword [esp], 0x804856c ; [0x804856c:4]=0x50006425 +| 0x08048426 call sym.imp.scanf ; sym.imp.scanf ; int scanf(const char *format) +| 0x0804842b mov dword [var_ch], 0x5a ; 'Z' ; 90 +| 0x08048432 mov dword [var_10h], 0x1ec ; 492 +| 0x08048439 mov edx, dword [var_10h] +| 0x0804843c lea eax, [var_ch] +| 0x0804843f add dword [eax], edx +| 0x08048441 mov eax, dword [var_ch] +| 0x08048444 imul eax, dword [var_ch] +| 0x08048448 mov dword [var_10h], eax +| 0x0804844b mov eax, dword [var_8h] +| 0x0804844e cmp eax, dword [var_10h] +| ,=< 0x08048451 jne 0x8048461 +| | 0x08048453 mov dword [esp], str.Password_OK_: ; [0x804856f:4]=0x73736150 ; "Password OK :)\n" +| | 0x0804845a call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) +| ,==< 0x0804845f jmp 0x804846d +| |`-> 0x08048461 mov dword [esp], str.Invalid_Password ; [0x804857f:4]=0x61766e49 ; "Invalid Password!\n" +| | 0x08048468 call sym.imp.printf ; sym.imp.printf ; int printf(const char *format) | | ; CODE XREF from main @ 0x804845f -| `--> 0x0804846d b800000000 mov eax, 0 -| 0x08048472 c9 leave -\ 0x08048473 c3 ret - +| `--> 0x0804846d mov eax, 0 +| 0x08048472 leave +\ 0x08048473 ret ``` -With the experience of solving crackme0x01, we can first locate the position of `cmp` instruction by using this simple oneliner: +With the experience of solving crackme0x01, we can first locate the position of `cmp` instruction by using +this simple oneliner: + ``` -[0x08048330]> pdf@main~cmp -| 0x0804844e 3b45f4 cmp eax, dword [var_ch] +[0x08048330]> pdf @ main~cmp +│ 0x0804844e cmp eax, dword [var_10h] ``` -Unfortunately, the variable compared to `eax` is stored in the stack. We can't check the value of this variable directly. It's a common case in reverse engineering that we have to derive the value of the variable from the previous sequence. As the amount of code is relatively small, it can be easily done. +Unfortunately, the variable compared to `eax` is stored in the stack. We can't check the value of this variable +directly. It's a common case in reverse engineering that we have to derive the value of the variable from +the previous sequence. As the amount of code is relatively small, it can be easily done. for example: + ``` -| 0x080483ed b800000000 mov eax, 0 -| 0x080483f2 83c00f add eax, 0xf ; 15 -| 0x080483f5 83c00f add eax, 0xf ; 15 -| 0x080483f8 c1e804 shr eax, 4 -| 0x080483fb c1e004 shl eax, 4 -| 0x080483fe 29c4 sub esp, eax +│ 0x080483ed mov eax, 0 +│ 0x080483f2 add eax, 0xf ; 15 +│ 0x080483f5 add eax, 0xf ; 15 +│ 0x080483f8 shr eax, 4 +│ 0x080483fb shl eax, 4 +│ 0x080483fe sub esp, eax ``` We can easily get the value of `eax`. It's 16. -Directly looking at the disassembly gets hard when the scale of program grows. Rizin's flagship decompiler [rz-ghidra](https://github.com/rizinorg/rz-ghidra) might be of help, here. You can install it easily: +Directly looking at the disassembly gets hard when the scale of program grows. Rizin's flagship decompiler +[rz-ghidra](https://github.com/rizinorg/rz-ghidra) might be of help, here. You can install it easily: + ``` rz-pm -i rz-ghidra ``` Decompile `main()` with the following command (like `F5` in IDA): -```C -[0x08048330]> pdg + +``` +[0x080483e4]> pdg @ main + undefined4 main(void) { - uint32_t var_ch; - undefined4 var_8h; - int32_t var_4h; + int32_t var_18h; + int32_t var_10h; + int32_t var_ch; + int32_t var_8h; - printf("IOLI Crackme Level 0x02\n"); - printf("Password: "); - scanf(0x804856c, &var_4h); - if (var_4h == 0x52b24) { - printf("Password OK :)\n"); + sym.imp.printf("IOLI Crackme Level 0x02\n"); + sym.imp.printf("Password: "); + sym.imp.scanf(0x804856c, &var_8h); + if (var_8h == 0x52b24) { + sym.imp.printf("Password OK :)\n"); } else { - printf("Invalid Password!\n"); + sym.imp.printf("Invalid Password!\n"); } return 0; } @@ -123,14 +134,19 @@ undefined4 main(void) It's more human-readable now. To check the string in `0x804856c`, we can: * Seek * Print the string +* + ``` -[0x08048330]> s 0x804856c +[0x080483e4]> s 0x804856c [0x0804856c]> ps %d ``` -It's exactly the format string of `scanf()`. And rz-ghidra recognizes that the second argument (eax) is a pointer and it points to `var_4h`. Which means our input will be stored in `var_4h`. -We can easily write out the pseudo code here. +It's exactly the format string of `scanf()`. And rz-ghidra recognizes that the second argument (eax) is a pointer, +and it points to `var_8h`. Which means our input will be stored in `var_8h`. + +We can easily write out the pseudocode here. + ```C var_ch = (var_8h + var_ch)^2; if (var_ch == our_input) diff --git a/src/crackmes/ioli/ioli_0x03.md b/src/crackmes/ioli/ioli_0x03.md index 97bf19c1..092c026a 100644 --- a/src/crackmes/ioli/ioli_0x03.md +++ b/src/crackmes/ioli/ioli_0x03.md @@ -1,5 +1,4 @@ -IOLI 0x03 -========= +# IOLI 0x03 This is the fourth crackme. @@ -21,7 +20,6 @@ nth paddr vaddr len size section type string 1 0x000005fe 0x080485fe 17 18 .rodata ascii Sdvvzrug#RN$$$#=, 2 0x00000610 0x08048610 24 25 .rodata ascii IOLI Crackme Level 0x03\n 3 0x00000629 0x08048629 10 11 .rodata ascii Password: - ``` Note that the 'Invalid Password!' and the 'Password OK :)' strings have been seemingly replaced by random @@ -29,7 +27,8 @@ gibberish. Let's analyze. -```C +``` +$ rizin ./crackme0x03 [0x08048360]> aaa [0x08048360]> pdg @ main @@ -48,8 +47,8 @@ undefined4 main(void) } ``` -This looks quite straightforward, `var_8h` is the result of `scanf` which the function `sym.test(var_8h, 0x52b24)` apparently -compares to the value `0x52b24`. +This looks quite straightforward, `var_8h` is the result of `scanf` which the function `sym.test(var_8h, 0x52b24)` +apparently compares to the value `0x52b24`. And indeed entering the decimal value of `0x52b24` (338724) gives us a pass. @@ -75,12 +74,13 @@ void sym.test(int32_t arg_4h, unsigned long arg_8h) return; } ``` -It's a two path conditional jump which compares two parameters and then does a shift. We can guess that `shift()` is most likely -some sort of decoding step of the seemingly random strings (shift cipher, e.g. Caesar cipher). + +It's a two path conditional jump which compares two parameters and then does a shift. We can guess that `shift()` +is most likely some sort of decoding step of the seemingly random strings (shift cipher, e.g. Caesar cipher). To confirm our suspicions let's analyze `sym.shift()`. -```C +``` [0x08048360]> pdg @ sym.shift // WARNING: Variable defined which should be unmapped: var_98h @@ -129,7 +129,7 @@ We can see that each character in `str` is subtracted by 3 to produce the final With this knowledge we can take a shot at decoding the strings. We can use the `pos` command to apply the subtraction needed for decoding and printing the result. -```bash +```shell $ rizin ./crackme0x03 [0x08048360]> aaa [0x08048360]> fs strings @@ -148,27 +148,29 @@ $ rizin ./crackme0x03 0x0804860e 29fd ). ``` -However, some functions may not be as easy to understand as this one, in which case it may be useful to be able to run the code. -Rizin provides us two ways of doing this: by using the debugger, or by emulation (using ESIL). +However, some functions may not be as easy to understand as this one, in which case it may be useful to be able to run +the code. Rizin provides us two ways of doing this: by using the debugger, or by emulation (using ESIL). -Let's first see how we can achieve this using the debugger. We will be wanting to pass the encoded strings to `shift()`. We know -`shift` takes one parameter `s`, which is an address to a (null terminated) string. We can see where on the stack local variables and -arguments are using the `afvl` command. +Let's first see how we can achieve this using the debugger. We will be wanting to pass the encoded strings +to `shift()`. We know `shift` takes one parameter `s`, which is an address to a (null terminated) string. +We can see where on the stack local variables and arguments are using the `afvl` command. -```bash +```shell $ rizin -d ./crackme0x03 -[0xec0ff970]> aa -[0xec0ff970]> afvl @ sym.shift +[0xf7f04630]> aa +[0xf7f04630]> afvl @ sym.shift var int32_t var_98h @ stack - 0x98 -var unsigned long var_80h @ stack - 0x80 +var int32_t var_80h @ stack - 0x80 var int32_t var_7ch @ stack - 0x7c -arg const char *s @ stack + 0x4 +arg int32_t arg_4h @ stack + 0x4 ``` We can see that `s` starts at a 4 byte offset from the stack pointer. -```bash -[0xec0ff970]> dcu main # run until program start +```shell +[0xf7f04630]> dcu main # run until program start +Continue until 0x08048498 +hit breakpoint at: 0x8048498 [0x08048498]> *esp+4=str.Lqydolg_Sdvvzrug # 'push' address onto the stack (note the 4 byte offset) [0x08048498]> dr eip=sym.shift # set instruction pointer to start of shift() [0x08048498]> dcr # run shift() until it returns @@ -179,8 +181,8 @@ Invalid Password! Password OK!!! :) ``` -Emulation is a bit more tricky because we can't make external calls to functions like `strlen()` and `printf()`. So we have to manually -skip over them and set the registers accordingly. Below is an example. +Emulation is a bit more tricky because we can't make external calls to functions like `strlen()` and `printf()`. +So we have to manually skip over them and set the registers accordingly. Below is an example. ```bash [0x08048414]> s 0x08048445 # the 'sub al, 0x03' diff --git a/src/crackmes/ioli/ioli_0x04.md b/src/crackmes/ioli/ioli_0x04.md index 7d749e5c..36b1144e 100644 --- a/src/crackmes/ioli/ioli_0x04.md +++ b/src/crackmes/ioli/ioli_0x04.md @@ -1,5 +1,4 @@ -IOLI 0x04 -========= +# IOLI 0x04 This is the fifth crackme. @@ -17,7 +16,7 @@ nth paddr vaddr len size section type string Checking for strings we see that our old friends "Password OK!" and "Password Incorrect!" are back in their unobfuscated forms. -```c +``` $ rizin ./crackme0x04 [0x080483d0]> aaa [0x080483d0]> pdg @ main @@ -39,47 +38,46 @@ undefined4 main(void) This time though, `scanf` takes a *string* and passes it to a function called `check`. -```c +``` [0x080483d0]> pdg @ sym.check -// WARNING: Variable defined which should be unmapped: var_28h -// WARNING: Variable defined which should be unmapped: var_24h -// WARNING: [rz-ghidra] Detected overlap for variable var_11h -void sym.check(int32_t arg_4h) +// WARNING: Variable defined which should be unmapped: format +// WARNING: Variable defined which should be unmapped: args + +void sym.check(char *s) { uint32_t uVar1; - int32_t var_28h; - int32_t var_24h; - undefined var_11h; - int32_t var_10h; - int32_t var_ch; + char *format; + va_list args; + char *var_11h; + unsigned long var_ch; int32_t var_8h; var_ch = 0; - var_10h = 0; + stack0xfffffff0 = 0; while( true ) { - uVar1 = sym.imp.strlen(arg_4h); - if (uVar1 <= (uint32_t)var_10h) break; - var_11h = *(undefined *)(var_10h + arg_4h); - sym.imp.sscanf(&var_11h, 0x8048638, &var_8h); + uVar1 = sym.imp.strlen(s); + if (uVar1 <= stack0xfffffff0) break; + var_11h._0_1_ = s[stack0xfffffff0]; + sym.imp.sscanf(&var_11h, data.08048638, &var_8h); var_ch = var_ch + var_8h; if (var_ch == 0xf) { sym.imp.printf("Password OK!\n"); sym.imp.exit(0); } - var_10h = var_10h + 1; + unique0x00003f80 = stack0xfffffff0 + 1; } sym.imp.printf("Password Incorrect!\n"); return; } + [0x080483d0]> afvl @ sym.check -var int32_t var_28h @ stack - 0x28 -var int32_t var_24h @ stack - 0x24 -var int32_t var_11h @ stack - 0x11 -var int32_t var_10h @ stack - 0x10 -var int32_t var_ch @ stack - 0xc +var const char *format @ stack - 0x28 +var va_list args @ stack - 0x24 +var const char *var_11h @ stack - 0x11 +var unsigned long var_ch @ stack - 0xc var int32_t var_8h @ stack - 0x8 -arg int32_t arg_4h @ stack + 0x4 +arg const char *s @ stack + 0x4 [0x080483d0]> ps @ 0x8048638 @!2 %d ``` @@ -88,8 +86,8 @@ A few things to note: `sscanf` in the `while` loop takes an integer ("%d"), and `var_8h`, which is subsequently used to increment `var_ch`. As soon as `var_ch` equals 15 (0xf) we gain entry. -Other than that however it may not be very obvious at first glance what exactly is going on here. So let's start a debugging session -to execute the function. +Other than that however it may not be very obvious at first glance what exactly is going on here. So let's start +a debugging session to execute the function. ```bash $ rizin -d ./crackme0x04 @@ -102,9 +100,9 @@ We will want to pass our own strings to `check`, so let's allocate some memory a ```bash [0x08048509]> dm+ 512 @ -1 # Allocate 512 bytes at anywhere (-1) -ra0=0xf3643000 -[0x08048509]> wz "letmein" @ 0xf3643000 # Write null-terminated string to our allocated memory -[0x08048509]> *esp+4=0xf3643000 # store the address under `arg_4h` (stack + 0x04) +ra0=0xf7fbb000 +[0x08048509]> wz "letmein" @ 0xf7fbb000 # Write null-terminated string to our allocated memory +[0x08048509]> *esp+4=0xf7fbb000 # store the address under `arg_4h` (stack + 0x04) ``` The password check completes if `var_ch` equals 15 (0xf) so let's add a breakpoint that prints the @@ -117,13 +115,13 @@ try to find it using `pdf @ sym.check`. [0x08048508]> db @ 0x080484d6 # set breakpoint [0x08048508]> dbc 'pxw 1 @ esp-0xc' @ 0x080484d6 # execute command on break [0x08048508]> dcr # execute until return -0xffa833f0 0x00000004 . # l -0xffa833f0 0x00000008 . # e -0xffa833f0 0x0000000c . # t -0xffa833f0 0x00000010 . # m -0xffa833f0 0x00000014 . # e -0xffa833f0 0x00000018 . # i -0xffa833f0 0x0000001c . # n +0xffeefb00 0x000000ff . # l +0xffeefb00 0x000000fe . # e +0xffeefb00 0x000000fd . # t +0xffeefb00 0x000000fc . # m +0xffeefb00 0x000000fb . # e +0xffeefb00 0x000000fa . # i +0xffeefb00 0x000000f9 . # n Password Incorrect! ``` @@ -134,14 +132,14 @@ of `var_ch`. This was never overwritten because `sscanf` didn't encounter any nu So let's try giving it a number as input. ```bash -[0x08048508]> wz "1234" @ 0xf3643010 -[0x08048508]> *esp+4=0xf3643010 +[0x08048508]> wz "1234" @ 0xf7fbb010 +[0x08048508]> *esp+4=0xf7fbb010 [0x08048508]> dr eip=sym.check [0x08048508]> dcr -0xffa833f0 0x00000001 . # 1 -0xffa833f0 0x00000003 . # 2 -0xffa833f0 0x00000006 . # 3 -0xffa833f0 0x0000000a . # 4 +0xffeefb00 0x00000001 . # 1 +0xffeefb00 0x00000003 . # 2 +0xffeefb00 0x00000006 . # 3 +0xffeefb00 0x0000000a . # 4 Password Incorrect! ``` diff --git a/src/crackmes/ioli/ioli_0x05.md b/src/crackmes/ioli/ioli_0x05.md index 746fe920..41f4ae3c 100644 --- a/src/crackmes/ioli/ioli_0x05.md +++ b/src/crackmes/ioli/ioli_0x05.md @@ -1,5 +1,4 @@ -IOLI 0x05 -========= +# IOLI 0x05 This is the sixth crackme. @@ -16,7 +15,8 @@ nth paddr vaddr len size section type string No interesting strings, so let's analyze. -```c +``` +$ rizin ./crackme0x05 [0x080483d0]> aa [0x080483d0]> pdg @ main @@ -33,6 +33,7 @@ undefined4 main(void) sym.check((int32_t)&var_7ch); return 0; } + [0x080483d0]> ps @ 0x80486b2 %s ``` @@ -72,6 +73,7 @@ void sym.check(int32_t arg_4h) sym.imp.printf("Password Incorrect!\n"); return; } + [0x080483d0]> ps @ 0x8048668 @! 2 %d ``` @@ -79,7 +81,7 @@ void sym.check(int32_t arg_4h) We can see that `check` is mostly the same, except that this time the digit sum has to equal 16 (0x10), after which a function named `parell` is called. -```c +``` [0x080483d0]> pdg @ sym.parell // WARNING: Variable defined which should be unmapped: var_18h @@ -98,6 +100,7 @@ void sym.parell(int32_t arg_4h) } return; } + [0x080483d0]> ps @ 0x8048668 @! 2 %d ``` diff --git a/src/crackmes/ioli/ioli_0x06.md b/src/crackmes/ioli/ioli_0x06.md index 7582395c..2c34d0f5 100644 --- a/src/crackmes/ioli/ioli_0x06.md +++ b/src/crackmes/ioli/ioli_0x06.md @@ -1,5 +1,4 @@ -IOLI 0x06 -========= +# IOLI 0x06 Onto the seventh crackme. @@ -26,7 +25,9 @@ Password Incorrect! No dice, so let's take a closer look. -```c +``` +$ rizin ./crackme0x06 +[0x08048400]> aa [0x08048400]> pdg @ main // WARNING: [rz-ghidra] Detected overlap for variable var_11h @@ -42,6 +43,7 @@ undefined4 main(undefined4 placeholder_0, undefined4 placeholder_1, char **envp) sym.check((int32_t)&var_7ch, (int32_t)envp); return 0; } + [0x08048400]> ps @ 0x8048787 %s [0x08048400]> afvl @ main @@ -52,7 +54,7 @@ arg char **envp @ stack + 0xc This looks the same as before, except the program's environment variables `envp` are passed to `check`. -```c +``` [0x08048400]> pdg @ sym.check // WARNING: Variable defined which should be unmapped: var_28h @@ -90,7 +92,7 @@ void sym.check(int32_t arg_4h, int32_t arg_8h) This looks mostly the same as well. If we follow `envp` (now named `arg_8h`) we can see it gets passed to `parell`. -```c +``` [0x08048400]> pdg @ sym.parell // WARNING: Variable defined which should be unmapped: var_18h @@ -121,7 +123,7 @@ void sym.parell(int32_t arg_4h, int32_t arg_8h) We can see that the parity check is still in place, except it's now in a loop that executes 10 times, but only if `dummy()` returns non-zero. -```c +``` [0x08048400]> pdg @ sym.dummy // WARNING: Variable defined which should be unmapped: var_18h @@ -149,16 +151,15 @@ undefined4 sym.dummy(undefined4 placeholder_0, int32_t arg_8h) ``` Living up to its name, `dummy` does not use its first parameter at all, only the second one is used which is the -`envp` parameter from `main`. Apparently some part of `envp` has to equal "LOL" (only the first 3 characters are used, note the '3' -in `strncmp`). +`envp` parameter from `main`. Apparently some part of `envp` has to equal "LOL" (only the first 3 characters are used +, note the '3' in `strncmp`). It will be easier to figure out how `dummy` works if we run the code, so let's use the debugger again! - -```bash +```shell $ rizin -d ./crackme0x06 -[0xe8570cd0]> aa -[0xe8570cd0]> dcu sym.dummy +[0xf7fb1630]> aa +[0xf7fb1630]> dcu sym.dummy Continue until 0x080484b4 IOLI Crackme Level 0x06 Password: 88 @@ -167,52 +168,52 @@ hit breakpoint at: 0x80484b4 Now we should be at the start of `dummy`, let's see where we can place a breakpoint. -```asm -[0x08048502]> pdf +``` +[0x080484b4]> pdf ; CALL XREF from sym.parell @ 0x8048547 -┌ sym.dummy(int32_t arg_8h); -│ ; var int32_t var_18h @ stack - 0x18 -│ ; var int32_t var_14h @ stack - 0x14 -│ ; var int32_t var_ch @ stack - 0xc -│ ; var int32_t var_8h @ stack - 0x8 -│ ; arg int32_t arg_8h @ stack + 0x8 -│ 0x080484b4 push ebp -│ 0x080484b5 mov ebp, esp -│ 0x080484b7 sub esp, 0x18 -│ 0x080484ba mov dword [var_8h], 0 -│ ┌─> 0x080484c1 mov eax, dword [var_8h] -│ ╎ 0x080484c4 lea edx, [eax*4] -│ ╎ 0x080484cb mov eax, dword [arg_8h] -│ ╎ 0x080484ce cmp dword [edx + eax], 0 -│ ┌──< 0x080484d2 je 0x804850e -│ │╎ 0x080484d4 mov eax, dword [var_8h] -│ │╎ 0x080484d7 lea ecx, [eax*4] -│ │╎ 0x080484de mov edx, dword [arg_8h] -│ │╎ 0x080484e1 lea eax, [var_8h] -│ │╎ 0x080484e4 inc dword [eax] -│ │╎ 0x080484e6 mov dword [var_14h], 3 -│ │╎ 0x080484ee mov dword [var_18h], 0x8048738 ; str.LOLO -│ │╎ ; [0x8048738:4]=0x4f4c4f4c ; "LOLO" -│ │╎ 0x080484f6 mov eax, dword [ecx + edx] -│ │╎ ;-- eip: -│ │╎ 0x080484f9 mov dword [esp], eax -│ │╎ 0x080484fc call sym.imp.strncmp ; sym.imp.strncmp ; int strncmp(const char *s1, const char *s2, size_t n) -│ │╎ 0x08048501 test eax, eax -│ │└─< 0x08048503 jne 0x80484c1 -│ │ 0x08048505 mov dword [var_ch], 1 -│ │┌─< 0x0804850c jmp 0x8048515 -│ └──> 0x0804850e mov dword [var_ch], 0 -│ │ ; CODE XREF from sym.dummy @ 0x804850c -│ └─> 0x08048515 mov eax, dword [var_ch] -│ 0x08048518 leave -└ 0x08048519 ret + ;-- eip: +/ sym.dummy(int32_t arg_8h); +| ; var int32_t var_18h @ stack - 0x18 +| ; var int32_t var_14h @ stack - 0x14 +| ; var int32_t var_ch @ stack - 0xc +| ; var int32_t var_8h @ stack - 0x8 +| ; arg int32_t arg_8h @ stack + 0x8 +| 0x080484b4 push ebp +| 0x080484b5 mov ebp, esp +| 0x080484b7 sub esp, 0x18 +| 0x080484ba mov dword [var_8h], 0 +| .-> 0x080484c1 mov eax, dword [var_8h] +| : 0x080484c4 lea edx, [eax*4] +| : 0x080484cb mov eax, dword [arg_8h] +| : 0x080484ce cmp dword [edx + eax], 0 +| ,==< 0x080484d2 je 0x804850e +| |: 0x080484d4 mov eax, dword [var_8h] +| |: 0x080484d7 lea ecx, [eax*4] +| |: 0x080484de mov edx, dword [arg_8h] +| |: 0x080484e1 lea eax, [var_8h] +| |: 0x080484e4 inc dword [eax] +| |: 0x080484e6 mov dword [var_14h], 3 +| |: 0x080484ee mov dword [var_18h], 0x8048738 ; str.LOLO +| |: ; [0x8048738:4]=0x4f4c4f4c ; "LOLO" +| |: 0x080484f6 mov eax, dword [ecx + edx] +| |: 0x080484f9 mov dword [esp], eax +| |: 0x080484fc call sym.imp.strncmp ; sym.imp.strncmp ; int strncmp(const char *s1, const char *s2, size_t n) +| |: 0x08048501 test eax, eax +| |`=< 0x08048503 jne 0x80484c1 +| | 0x08048505 mov dword [var_ch], 1 +| |,=< 0x0804850c jmp 0x8048515 +| `--> 0x0804850e mov dword [var_ch], 0 +| | ; CODE XREF from sym.dummy @ 0x804850c +| `-> 0x08048515 mov eax, dword [var_ch] +| 0x08048518 leave +\ 0x08048519 ret ``` The instruction at `0x080484f9` looks like a good spot. This is just before `strncmp` is called, so we can see what value is passed to it. -```bash +``` [0x08048502]> db @ 0x080484f9 [0x08048502]> dbc 'psi @r:eax' @ 0x080484f9 [0x08048502]> dcr diff --git a/src/crackmes/ioli/ioli_0x07.md b/src/crackmes/ioli/ioli_0x07.md index d15bc7b8..6616bbc3 100644 --- a/src/crackmes/ioli/ioli_0x07.md +++ b/src/crackmes/ioli/ioli_0x07.md @@ -1,24 +1,23 @@ -IOLI 0x07 -========= +# IOLI 0x07 Already onto the eighth crackme! -```bash +```shell $ rz-bin -z ./crackme0x07 [Strings] -nth paddr vaddr len size section type string -――――――――――――――――――――――――――――――――――――――――――――――――――――――― +nth paddr vaddr len size section type string +--------------------------------------------------------------------------- 0 0x000007a8 0x080487a8 4 5 .rodata ascii LOLO 1 0x000007ad 0x080487ad 20 21 .rodata ascii Password Incorrect!\n 2 0x000007c5 0x080487c5 13 14 .rodata ascii Password OK!\n 3 0x000007d3 0x080487d3 5 6 .rodata ascii wtf?\n 4 0x000007d9 0x080487d9 24 25 .rodata ascii IOLI Crackme Level 0x07\n -5 0x000007f2 0x080487f2 10 11 .rodata ascii Password: +5 0x000007f2 0x080487f2 10 11 .rodata ascii Password: ``` Doing our routine strings check we see another new contender, wtf? Literally. -```c +``` $ rizin ./crackme0x07 [0x08048400]> aa [0x08048400]> pdg @ main @@ -41,7 +40,7 @@ undefined4 main(undefined4 placeholder_0, undefined4 placeholder_1, char **envp) Upping the difficulty, `check` is no longer exported so it's now listed as `fcn.080485b9`. To make our lives a bit easier, let's set the name manually. -```c +``` [0x08048400]> afn check @ fcn.080485b9 [0x08048400]> pdg @ check @@ -88,12 +87,13 @@ void check(int32_t arg_4h, int32_t arg_8h) } ``` -This looks like the `check` we've seen in previous version except there is now a parity check slapped on the end of it where -the string "wtf?" is printed. +This looks like the `check` we've seen in previous version except there is now a parity check slapped on the end of it +where the string "wtf?" is printed. Before we can continue to the other functions, they have to be analyzed first. We can analyze all functions recursively -using `afr`. -```c +using `afr`. + +``` [0x08048400]> afr @ check [0x08048400]> pdg @ check @@ -139,13 +139,14 @@ void check(int32_t arg_4h, int32_t arg_8h) return; } ``` + The reason we're doing it this way in this case, is because `aaa` will cause some critical information to be omitted: namely the code that prints `"wtf?"`, more on that later. -For now though let's first check out `fcn.08048542`. We can probably already guess its identity as the code structure remains largely unchanged -from the previous versions. But it can't hurt to do our due diligence. +For now though let's first check out `fcn.08048542`. We can probably already guess its identity as the code structure +remains largely unchanged from the previous versions. But it can't hurt to do our due diligence. -```c +``` [0x08048400]> pdg @ fcn.08048542 // WARNING: Variable defined which should be unmapped: var_18h @@ -176,10 +177,11 @@ void fcn.08048542(int32_t arg_4h, int32_t arg_8h) } ``` -That does indeed look like `parell` from the previous versions. And that must make `fcn.080484b4` `dummy`. But look, there's an extra `if` inside -the parity check! Apparently some global variable has to be set to `1` in order for the password to be valid. +That does indeed look like `parell` from the previous versions. And that must make `fcn.080484b4` `dummy`. But look, +there's an extra `if` inside the parity check! Apparently some global variable has to be set to `1` in order for +the password to be valid. -```c +``` [0x08048400]> pdg @ fcn.080484b4 // WARNING: Variable defined which should be unmapped: var_18h @@ -210,12 +212,12 @@ undefined4 fcn.080484b4(undefined4 placeholder_0, int32_t arg_8h) [0x08048400]> afn parell @ fcn.08048542 ``` -And this must be `dummy`... With an addition. Can you spot it? This is where that global variable that we saw earlier gets set! -On the line containing `*(undefined4 *)0x804a02c = 1;`, more specifically. +And this must be `dummy`... With an addition. Can you spot it? This is where that global variable that we saw earlier +gets set! On the line containing `*(undefined4 *)0x804a02c = 1;`, more specifically. But before we continue let's see if there are any other references to or from this global variable. -```c +``` [0x08048400]> axf @ 0x804a02c [0x08048400]> axt @ 0x804a02c dummy 0x8048505 [DATA] mov dword [0x804a02c], 1 @@ -224,7 +226,7 @@ parell 0x804858f [DATA] cmp dword [0x804a02c], 1 It doesn't appear to be the case, so let's go back to `check`. -```c +``` [0x08048400]> pdg @ check // WARNING: Variable defined which should be unmapped: var_28h @@ -272,11 +274,10 @@ void check(int32_t arg_4h, int32_t arg_8h) We still have one unidentified function left: `fcn.08048524`. - -```c +``` [0x08048400]> pdg @ fcn.08048524 -void fcn.08048524 noreturn (void) +void fcn.08048524(void) { sym.imp.printf("Password Incorrect!\n"); // WARNING: Subroutine does not return @@ -284,9 +285,10 @@ void fcn.08048524 noreturn (void) } ``` -This doesn't seem to do much, other than to print that the password is incorrect and exit. So let's call it `print_and_exit`. +This doesn't seem to do much, other than to print that the password is incorrect and exit. So let's call it +`print_and_exit`. -```c +``` [0x08048400]> afn print_and_exit @ fcn.08048524 [0x08048400]> pdg @ check @@ -333,9 +335,9 @@ void check(int32_t arg_4h, int32_t arg_8h) } ``` -Interestingly, `print_and_exit` is called unconditionally before the second parity check, meaning it is never executed under normal -circumstances. If we had used `aaa` to analyze this binary, Rizin would have noticed this and it would have simply omitted it from -the disassembly and decompilation outputs. +Interestingly, `print_and_exit` is called unconditionally before the second parity check, meaning it is never executed +under normal circumstances. If we had used `aaa` to analyze this binary, Rizin would have noticed this, and it would +have simply omitted it from the disassembly and decompilation outputs. If you happen to accidentally (or intentionally) run `aaa`, you can remove all function analysis using `af-*`, after which you can run `aa`, followed by `afr` where needed. @@ -348,13 +350,13 @@ With that being said, it doesn't seem like the password constraints have changed Before we close Rizin however let's save this as a project first, so we don't lose all our hard work naming the functions. -```c +``` [0x08048400]> Ps crackme0x07.rzdb ``` And as we concluded, the passwords from the previous version still work. -```bash +```shell $ LOL= ./crackme0x07 IOLI Crackme Level 0x07 Password: 88 @@ -366,19 +368,19 @@ Password: 12346 Password OK! ``` -## Wtf? +## WTF? -We could go to the next one. Technically we've solved this crackme. But we have some unfinished business: the `wtf?` string. Let's see if we can -find a way to reach the code that's supposed to write it to the console! +We could go to the next one. Technically we've solved this crackme. But we have some unfinished business: the `wtf?` +string. Let's see if we can find a way to reach the code that's supposed to write it to the console! -It's easy enough using the debugger: we can simply set the instruction pointer to some location after the `print_and_exit` function (remember -`dr eip=
`). +It's easy enough using the debugger: we can simply set the instruction pointer to some location after +the `print_and_exit` function (remember `dr eip=
`). -We can reopen the current file in debug mode using the `ood` command. We do need an environment variable set that starts with `LOL`, we -can achieve this using the `dor` command. And let's also set a breakpoint at the location `print_and_exit` is called so we can jump -over it manually. +We can reopen the current file in debug mode using the `ood` command. We do need an environment variable set that +starts with `LOL`, we can achieve this using the `dor` command. And let's also set a breakpoint at the location +`print_and_exit` is called so we can jump over it manually. -```bash +``` [0x08048400]> ood Process with PID 191704 started... [0xf173fcd0]> dor setenv=LOL=O @@ -391,43 +393,39 @@ Password: 2 hit breakpoint at: 0x804862a ``` -Now we should be at the instruction that reads `call print_and_exit` (confirm with `pd 1 @ eip`). Now we need to find the address of the -instruction that comes after this one and set the instruction pointer to equal this value. +Now we should be at the instruction that reads `call print_and_exit` (confirm with `pd 1 @ eip`). Now we need to find +the address of the instruction that comes after this one and set the instruction pointer to equal this value. -```bash -[0x0804862a]> pd 2 @ eip ``` -```asm +[0x0804862a]> pd 2 @ eip │ ;-- eip: │ 0x0804862a b call print_and_exit ; print_and_exit │ 0x0804862f mov eax, dword [arg_8h] -``` -```bash [0x0804862a]> dr eip=0x0804862f [0x0804862a]> pd 1 @ eip -``` -```asm │ ;-- eip: │ 0x0804862f mov eax, dword [arg_8h] ``` With the `print_and_exit` function skipped we can continue execution. -```bash +``` [0x0804862a]> dc wtf? (191704) Process exited with status=0x0 [0xf3608579]> doc # close the debugging session ``` -We've successfully triggered the `wtf?` code using the debugger. But that's no fun! Let's see if there is a way we can reach that -code (semi-)naturally. +We've successfully triggered the `wtf?` code using the debugger. But that's no fun! Let's see if there is a way we can +reach that code (semi-)naturally. -In order for the `print_and_exit` function to be called we have to fail `parell` or the digit sum `check`. Failing `parell` is tricky -because the same version has to succeed after `print_and_exit` in order for our desired string to be printed. So we'll have -to fail the digit sum check, which means making sure that our digit sum will not land on 16 during the computation. +In order for the `print_and_exit` function to be called we have to fail `parell` or the digit sum `check`. +Failing `parell` is tricky because the same version has to succeed after `print_and_exit` in order for our desired +string to be printed. So we'll have to fail the digit sum check, which means making sure that our digit sum will +not land on 16 during the computation. -Easy enough! The only problem we have is that `exit` stops the process... But what if we were to make our own version of `exit`? +Easy enough! The only problem we have is that `exit` stops the process... But what if we were to make our own version +of `exit`? ```c void exit(int status) { @@ -442,8 +440,9 @@ void exit(int status) { ``` This turns `exit` into something that, well, *doesn't* exit. `__builtin_return_address` is used to look two -call frames up for a return address (the return address of `print_and_exit`) and jumps to it. Let's save it to a file called `exit.c`. -Compile it to a shared library using `gcc -m32 -shared -o libexit.so exit.c` and then we can preload it using `LD_PRELOAD`. +call frames up for a return address (the return address of `print_and_exit`) and jumps to it. Let's save it to a file +called `exit.c`. Compile it to a shared library using `gcc -m32 -shared -o libexit.so exit.c` and then we can preload +it using `LD_PRELOAD`. ```bash $ LD_PRELOAD=./libexit.so LOL= ./crackme0x07 diff --git a/src/crackmes/ioli/ioli_0x08.md b/src/crackmes/ioli/ioli_0x08.md index bb158402..facdb9df 100644 --- a/src/crackmes/ioli/ioli_0x08.md +++ b/src/crackmes/ioli/ioli_0x08.md @@ -1,5 +1,4 @@ -IOLI 0x08 -========= +# IOLI 0x08 Time for the ninth crackme. @@ -13,13 +12,13 @@ nth paddr vaddr len size section type string 2 0x000007c5 0x080487c5 13 14 .rodata ascii Password OK!\n 3 0x000007d3 0x080487d3 5 6 .rodata ascii wtf?\n 4 0x000007d9 0x080487d9 24 25 .rodata ascii IOLI Crackme Level 0x08\n -5 0x000007f2 0x080487f2 10 11 .rodata ascii Password: +5 0x000007f2 0x080487f2 10 11 .rodata ascii Password: ``` -It looks like no new strings have been added. Before we jump into analyzing however, let's first see which functions have changed from the -previous version. We can get a nice overview using `rz-diff`. +It looks like no new strings have been added. Before we jump into analyzing however, let's first see which functions +have changed from the previous version. We can get a nice overview using `rz-diff`. -```diff +``` $ rz-diff -t functions crackme0x07 crackme0x08 .--------------------------------------------------------------------------------------------------------------------------. | name0 | size0 | addr0 | type | similarity | addr1 | size1 | name1 | diff --git a/src/crackmes/ioli/ioli_0x09.md b/src/crackmes/ioli/ioli_0x09.md index 2b335a6f..956edccf 100644 --- a/src/crackmes/ioli/ioli_0x09.md +++ b/src/crackmes/ioli/ioli_0x09.md @@ -1,11 +1,10 @@ -IOLI 0x09 -========= +# IOLI 0x09 And that brings us onto the last crackme. We can also use `rz-diff` to check for string differences. -```diff +``` $ rz-diff -t strings crackme0x08 crackme0x09 --- crackme0x08 +++ ./crackme0x09 @@ -23,7 +22,7 @@ $ rz-diff -t strings crackme0x08 crackme0x09 The only change is the version info (from 0x08 to 0x09). So let's check for functions. -```diff +``` $ rz-diff -t functions crackme0x08 ./crackme0x09 .--------------------------------------------------------------------------------------------------------------------------. | name0 | size0 | addr0 | type | similarity | addr1 | size1 | name1 | @@ -52,7 +51,7 @@ $ rz-diff -t functions crackme0x08 ./crackme0x09 We can see that a few functions have been changed. So let's check it out! We can also see that this version strips the symbol names again, but that should be no problem. We can easily identify them using the functions diff. -```bash +```shell $ rizin ./crackme0x09 [0x08048420]> aa [0x08048420]> afr @ main # recursively analyze functions, starting from main @@ -60,8 +59,6 @@ $ rizin ./crackme0x09 [0x08048420]> afn parell @ fcn.08048589 [0x08048420]> afn che @ fcn.0804855d [0x08048420]> afn dummy @ fcn.080484d4 -``` -```c [0x08048420]> pdg @ main // WARNING: Variable defined which should be unmapped: var_8h @@ -90,14 +87,13 @@ Looking at the functions diff we can see that `fcn.08048766` is named `__i686.ge in position-independent code to get the addresses of global constants (like string constants). Let's see if we can find out to which strings these offsets resolve to, but let's first give this new function a name. - -```bash +``` [0x08048420]> afn sym.__i686.get_pc_thunk.bx @ fcn.08048766 ``` To compute the addresses we can use ESIL. But we need to initialize it first. -```bash +``` [0x08048420]> s main [0x080486ee]> aei [0x080486ee]> aeip @@ -132,8 +128,6 @@ what particular string was printed here. ```bash [0x080486ef]> CC "IOLI Crackme Level 0x09" @ eip [0x080486ef]> pd 1 @ eip -``` -```asm │ ;-- eip: │ 0x08048722 call sym.imp.printf ; sym.imp.printf ; IOLI Crackme Level 0x09 ; int printf(const char *format) ``` diff --git a/src/debugger/apple.md b/src/debugger/apple.md index e1931d8f..5dedb724 100644 --- a/src/debugger/apple.md +++ b/src/debugger/apple.md @@ -1,8 +1,10 @@ # macOS/iOS ## Sign rizin binary + If you install the Rizin pkg file and try to use it to debug a binary, you will very likely get an error message such as: + ``` Child killed ptrace: Cannot attach: Invalid argument @@ -28,17 +30,17 @@ $ codesign --entitlements --force -s - $(which rizin) However, be aware that even with a signed rizin binary you cannot debug binaries signed by Apple. To bypass the problem you have a few options: + - Remove the certificate of the debuggee, by using `codesign --remove-signature ` or other alternatives like [unsign](https://github.com/steakknife/unsign). WARNING: this cannot be undone, so we suggest to make a copy of the original binary. - Disable SIP with `csrutil enable --without debug` in Recovery Mode. - -## Debugging on MacOS over SSH +## Debugging on macOS over SSH If you are trying to debug a program over SSH, you may experience failures -like rizin getting stuck while opening the file. This is because the OS is +like Rizin getting stuck while opening the file. This is because the OS is waiting for user authentication to allow debugging. However, since you are over SSH, the OS has no way of showing the permission window. diff --git a/src/debugger/files.md b/src/debugger/files.md index 171ff84f..90b8e363 100644 --- a/src/debugger/files.md +++ b/src/debugger/files.md @@ -1,7 +1,9 @@ # Files -The rizin debugger allows the user to list and manipulate the file descriptors from the target process. +The Rizin debugger allows the user to list and manipulate the file descriptors from the target process. -This is a useful feature, which is not found in other debuggers, the functionality is similar to the lsof command line tool, but have extra subcommands to change the seek, close or duplicate them. +This is a useful feature, which is not found in other debuggers, the functionality is similar to the `lsof` +command line tool, but have extra subcommands to change the seek, close or duplicate them. -So, at any time in the debugging session you can replace the stdio file descriptors to use network sockets created by rizin, or replace a network socket connection to hijack it. +So, at any time in the debugging session you can replace the stdio file descriptors to use network sockets created +by Rizin, or replace a network socket connection to hijack it. diff --git a/src/debugger/getting_started.md b/src/debugger/getting_started.md index 667a093d..dc53be5a 100644 --- a/src/debugger/getting_started.md +++ b/src/debugger/getting_started.md @@ -1,8 +1,9 @@ # Getting Started -## Small session in rizin debugger +## Small session in Rizin debugger -* `rizin -d /bin/ls`: Opens rizin with file `/bin/ls` in debugger mode using the rizin native debugger, but does not run the program. You’ll see a prompt (rizin) - all examples are from this prompt. +* `rizin -d /bin/ls`: Opens rizin with file `/bin/ls` in debugger mode using the rizin native debugger, + but does not run the program. You’ll see a prompt (rizin) - all examples are from this prompt. * `db @ flag`: place a breakpoint at flag, where flag can be either an address or a function name diff --git a/src/debugger/heap.md b/src/debugger/heap.md index 11436621..93e7b9ed 100644 --- a/src/debugger/heap.md +++ b/src/debugger/heap.md @@ -1,6 +1,7 @@ # Heap -rizin's `dm` subcommands can also display a map of the heap which is useful for those who are interested in inspecting the heap and its content. Simply execute `dmh` to show a map of the heap: +rizin's `dm` subcommands can also display a map of the heap which is useful for those who are interested in inspecting +the heap and its content. Simply execute `dmh` to show a map of the heap: ``` [0x55c392ae1189]> dmh @@ -35,7 +36,9 @@ Heap Layout │ Top chunk @ 0x55c3934c9370 │ └───────────────────────────────┘ ``` + To get information about bins of the main arena use the `dmhd` command. + ``` [0x55c392ae1189]> dmhd Tcache bins in Main Arena @ 0x7f10940c1b80 @@ -65,26 +68,20 @@ Other heap commands can be found under `dmh`, check `dmh?` for the full list. ``` [0x00000000]> dmh? -Usage: dmh # Memory map heap -| dmh List the chunks inside the heap segment -| dmh @[malloc_state] List heap chunks of a particular arena -| dmha List all malloc_state instances in application -| dmhb @[malloc_state] Display all parsed Double linked list of main_arena's or a particular arena bins instance -| dmhb [bin_num|bin_num:malloc_state] Display parsed double linked list of bins instance from a particular arena -| dmhbg [bin_num] Display double linked list graph of main_arena's bin [Under development] -| dmhc @[chunk_addr] Display malloc_chunk struct for a given malloc chunk -| dmhd [tcache|unsorted|fast|small|large] Display description of bins in the main_arena -| dmhf @[malloc_state] Display all parsed fastbins of main_arena's or a particular arena fastbinY instance -| dmhf [fastbin_num|fastbin_num:malloc_state] Display parsed single linked list in fastbinY instance from a particular arena -| dmhg Display heap graph of heap segment -| dmhg [malloc_state] Display heap graph of a particular arena -| dmhi @[malloc_state] Display heap_info structure/structures for a given arena -| dmhj List the chunks inside the heap segment in JSON format -| dmhm List all elements of struct malloc_state of main thread (main_arena) -| dmhm @[malloc_state] List all malloc_state instance of a particular arena -| dmht Display all parsed thread cache bins of all arena's tcache instance -| dmhv @[malloc_state] List heap chunks of a particular arena along with hexdump of first 0x10 bytes -| dmh? Show map heap help +Usage: dmh[?] # Glibc heap commands +| dmh[j*l] [] # List heap chunks of an arena +| dmha # List all the arenas +| dmhb[?] [] # Display double linked list for bins in an arena. Use dmhbg command for + graphical representation. +| dmhc # Get info about heap chunk at current offset +| dmhd[j] [] # Display state of bins in an arena. can be tcache/fast/unsorted/small/large +| dmhf[?] [] # Display all parsed fastbins of main_arena's or a particular arena + fastbinY instance +| dmhg [] # Display heap graph of a particular arena +| dmhi [] # Display heap_info structure/structures for a given arena +| dmhm[*] [] # List all elements of struct malloc_state +| dmht # Display all parsed thread cache bins of all arena's tcache instance ``` + Rizin supports Glibc, Jemalloc < 5.0 and Windows heap. diff --git a/src/debugger/intro.md b/src/debugger/intro.md index 59ce77d8..941c5b87 100644 --- a/src/debugger/intro.md +++ b/src/debugger/intro.md @@ -1,17 +1,24 @@ # Debugger -Debuggers are implemented as IO plugins. Therefore, rizin can handle different URI types for spawning, attaching and controlling processes. The complete list of IO plugins can be viewed with `rizin -L`. Those that have "d" in the first column ("rwd") support debugging. For example: +Debuggers are implemented as IO plugins. Therefore, rizin can handle different URI types for spawning, +attaching and controlling processes. The complete list of IO plugins can be viewed with `rizin -L`. +Those that have "d" in the first column ("rwd") support debugging. For example: ``` r_d debug Debug a program or pid. dbg:///bin/ls, dbg://1388 (LGPL3) rwd gdb Attach to gdbserver, 'qemu -s', gdb://localhost:1234 (LGPL3) ``` -There are different backends for many target architectures and operating systems, e.g., GNU/Linux, Windows, MacOS X, (Net,Free,Open)BSD and Solaris. +There are different backends for many target architectures and operating systems, e.g., GNU/Linux, Windows, macOS, +Net/Free/OpenBSD and Solaris. -Process memory is treated as a plain file. All mapped memory pages of a debugged program and its libraries can be read and interpreted as code or data structures. +Process memory is treated as a plain file. All mapped memory pages of a debugged program and its libraries can be +read and interpreted as code or data structures. -Communication between rizin and the debugger IO layer is wrapped into `system()` calls, which accept a string as an argument, and executes it as a command. An answer is then buffered in the output console, its contents can be additionally processed by a script. Access to the IO system is achieved with `R!`. Most IO plugins provide help with `R!?` or `R!help`. For example: +Communication between Rizin and the debugger IO layer is wrapped into `system()` calls, which accept a string +as an argument, and executes it as a command. An answer is then buffered in the output console, its contents +can be additionally processed by a script. Access to the IO system is achieved with `R!`. Most IO plugins provide +help with `R!?` or `R!help`. For example: ``` $ rizin -d /bin/ls @@ -24,38 +31,49 @@ Usage: R!cmd args R!pid <#> - select new pid ``` -In general, debugger commands are portable between architectures and operating systems. Still, as rizin tries to support the same functionality for all target architectures and operating systems, certain things have to be handled separately. They include injecting shellcodes and handling exceptions. For example, in MIPS targets there is no hardware-supported single-stepping feature. In this case, rizin provides its own implementation for single-step by using a mix of code analysis and software breakpoints. +In general, debugger commands are portable between architectures and operating systems. Still, as Rizin tries +to support the same functionality for all target architectures and operating systems, certain things have +to be handled separately. They include injecting shellcodes and handling exceptions. For example, in MIPS targets +there is no hardware-supported single-stepping feature. In this case, Rizin provides its own implementation +for single-step by using a mix of code analysis and software breakpoints. -To get basic help for the debugger, type 'd?': +To get basic help for the debugger, type `d?`: ``` -Usage: d # Debug commands -| db[?] Breakpoints commands -| dbt[?] Display backtrace based on dbg.btdepth and dbg.btalgo -| dc[?] Continue execution -| dd[?] File descriptors (!fd in r1) -| de[-sc] [perm] [rm] [e] Debug with ESIL (see de?) -| dg Generate a core-file (WIP) -| dH [handler] Transplant process to a new handler -| di[?] Show debugger backend information (See dh) -| dk[?] List, send, get, set, signal handlers of child -| dL[?] List or set debugger handler -| dm[?] Show memory maps -| do[?] Open process (reload, alias for 'oo') -| doo[args] Reopen in debug mode with args (alias for 'ood') -| doof[file] Reopen in debug mode from file (alias for 'oodf') -| doc Close debug session -| dp[?] List, attach to process or thread id -| dr[?] Cpu registers -| ds[?] Step, over, source line -| dt[?] Display instruction traces -| dw Block prompt until pid dies -| dx[?] Inject and run code on target process (See gs) +[0x000000000000]> d? +Usage: d # Debugger commands +| db[?] # Breakpoints commands +| dc[?] # Continue execution +| dd[-lsdrw] # Debug file descriptors commands +| de[lcs?] # Manage ESIL watchpoints +| dg [] # Generate core dump file +| do # Debug (re)open commands +| ds[?] # Debug step commands +| dt[?] # Trace commands +| di[j*q] # Debug information +| dk[lnNo] # Debug signals management +| dl[l] # Debug handler +| dm[?] # Memory map commands +| dp[?] # List or attach to process or thread +| dr[?] # CPU Registers +| dw [] # Block prompt until dies +| dW[i] # Windows process commands +| dx[aers] # Code injection commands ``` -To restart your debugging session, you can type `oo` or `oo+`, depending on desired behavior. +To restart your debugging session, you can use one of `oo` commands, depending on desired behavior: ``` -oo reopen current file (kill+fork in debugger) -oo+ reopen current file in read-write +[0x00000000]> oo? +Usage: oo[+bcdmn?] # Reopen current file +| oo [] # Reopen current file or file +| oo+ [] # Reopen current file or file in write mode +| oob [] # Reopen current file and reload binary information +| ooc # Reopen current file as if restarting rizin +| ood[fr] # Reopen current file in debug mode +| oom # Reopen curent file in malloc:// +| oon # Reopen curent file without loading binary information +| oon+ # Reopen curent file in write-mode without loading binary information +| oonn # Reopen curent file without loading binary information but with header flags +| oonn+ # Reopen curent file in write-mode without loading binary information but with header flags ``` diff --git a/src/debugger/memory_maps.md b/src/debugger/memory_maps.md index a29c5358..63b5e5b3 100644 --- a/src/debugger/memory_maps.md +++ b/src/debugger/memory_maps.md @@ -1,139 +1,175 @@ # Memory Maps -The ability to understand and manipulate the memory maps of a debugged program is important for many different Reverse Engineering tasks. rizin offers a rich set of commands to handle memory maps in the binary. This includes listing the memory maps of the currently debugged binary, removing memory maps, handling loaded libraries and more. +The ability to understand and manipulate the memory maps of a debugged program is important for many different +Reverse Engineering tasks. Rizin offers a rich set of commands to handle memory maps in the binary. +This includes listing the memory maps of the currently debugged binary, removing memory maps, +handling loaded libraries and more. First, let's see the help message for `dm`, the command which is responsible for handling memory maps: ``` [0x55f2104cf620]> dm? -Usage: dm # Memory maps commands -| dm List memory maps of target process -| dm address size Allocate bytes at
(anywhere if address is -1) in child process -| dm= List memory maps of target process (ascii-art bars) -| dm. Show map name of current address -| dm* List memmaps in rizin commands -| dm- address Deallocate memory map of
-| dmd[a] [file] Dump current (all) debug map region to a file (from-to.dmp) (see Sd) -| dmh[?] Show map of heap -| dmi [addr|libname] [symname] List symbols of target lib -| dmi* [addr|libname] [symname] List symbols of target lib in rizin commands -| dmi. List closest symbol to the current address -| dmiv Show address of given symbol for given lib -| dmj List memmaps in JSON format -| dml Load contents of file into the current map region -| dmm[?][j*] List modules (libraries, binaries loaded in memory) -| dmp[?]
Change page at
with , protection (perm) -| dms[?] Take memory snapshot -| dms- Restore memory snapshot -| dmS [addr|libname] [sectname] List sections of target lib -| dmS* [addr|libname] [sectname] List sections of target lib in rizin commands -| dmL address size Allocate bytes at
and promote to huge page -``` - -In this chapter, we'll go over some of the most useful subcommands of `dm` using simple examples. For the following examples, we'll use a simple `helloworld` program for Linux but it'll be the same for every binary. +Usage: dm[?] # Memory map commands +| dm[j*qt] # List memory maps +| dm+ # Allocate bytes at current offset +| dm= # List memory maps of current process with ASCII art bars +| dm. # Show map name of current address +| dmm[j*.] # Module memory map commands +| dm- # Deallocate memory map at current offset +| dmd[aw] # Dump debug map regions to a file (from-to.dmp) +| dmh[?] # Glibc heap commands +| dmi[?] # List/Load symbols +| dml # Load contents of file into current map region +| dmp [] # Change page at current offset with , protection / Change dbg.map permissions + to +| dmL # Allocate bytes at current offset and promote to huge page +| dmS[*] [ []] # List sections of target lib +| dmw[jb?] # Windows heap commands +| dmx[?] # Jemalloc heap commands +``` + +In this chapter, we'll go over some of the most useful subcommands of `dm` using simple examples. +For the following examples, we'll use a simple +[hello_world](https://github.com/rizinorg/book/tree/master/examples/hello_world) for Linux, +but it'll be the same for every binary. First things first - open a program in debugging mode: ``` -$ rizin -d helloworld -Process with PID 20304 started... -= attach 20304 20304 -bin.baddr 0x56136b475000 -Using 0x56136b475000 -asm.bits 64 -[0x7f133f022fb0]> +$ rizin -d hello_world +Process with PID 4760 started... +[0x7f12fa1debb0]> ``` -> Note that we passed "helloworld" to rizin without "./". rizin will try to find this program in the current directory and then in $PATH, even if no "./" is passed. This is contradictory with UNIX systems, but makes the behaviour consistent for windows users +> Note that we passed "hello_world" to rizin without "./". rizin will try to find this program in the current +> directory and then in $PATH, even if no "./" is passed. This is contradictory with UNIX systems, +> but makes the behaviour consistent for Windows users Let's use `dm` to print the memory maps of the binary we've just opened: ``` [0x7f133f022fb0]> dm -0x0000563a0113a000 - usr 4K s r-x /tmp/helloworld /tmp/helloworld ; map.tmp_helloworld.r_x -0x0000563a0133a000 - usr 8K s rw- /tmp/helloworld /tmp/helloworld ; map.tmp_helloworld.rw -0x00007f133f022000 * usr 148K s r-x /usr/lib/ld-2.27.so /usr/lib/ld-2.27.so ; map.usr_lib_ld_2.27.so.r_x -0x00007f133f246000 - usr 8K s rw- /usr/lib/ld-2.27.so /usr/lib/ld-2.27.so ; map.usr_lib_ld_2.27.so.rw -0x00007f133f248000 - usr 4K s rw- unk0 unk0 ; map.unk0.rw -0x00007fffd25ce000 - usr 132K s rw- [stack] [stack] ; map.stack_.rw -0x00007fffd25f6000 - usr 12K s r-- [vvar] [vvar] ; map.vvar_.r -0x00007fffd25f9000 - usr 8K s r-x [vdso] [vdso] ; map.vdso_.r_x -0xffffffffff600000 - usr 4K s r-x [vsyscall] [vsyscall] ; map.vsyscall_.r_x +0x000055ca0f426000 - 0x000055ca0f427000 - usr 4K s r-- /tmp/hello_world /tmp/hello_world ; tmp_hello_world.r +0x000055ca0f427000 - 0x000055ca0f428000 - usr 4K s r-x /tmp/hello_world /tmp/hello_world ; tmp_hello_world.r_x +0x000055ca0f428000 - 0x000055ca0f429000 - usr 4K s r-- /tmp/hello_world /tmp/hello_world ; tmp_hello_world.r.55ca0f428000 +0x000055ca0f429000 - 0x000055ca0f42b000 - usr 8K s rw- /tmp/hello_world /tmp/hello_world ; tmp_hello_world.rw +0x00007f52c4ae0000 - 0x00007f52c4ae1000 - usr 4K s r-- /usr/lib64/ld-linux-x86-64.so.2 /usr/lib64/ld-linux-x86-64.so.2 ; usr_lib64_ld_linux_x86_64.so.2.r +0x00007f52c4ae1000 - 0x00007f52c4b06000 * usr 148K s r-x /usr/lib64/ld-linux-x86-64.so.2 /usr/lib64/ld-linux-x86-64.so.2 ; usr_lib64_ld_linux_x86_64.so.2.r_x +0x00007f52c4b06000 - 0x00007f52c4b11000 - usr 44K s r-- /usr/lib64/ld-linux-x86-64.so.2 /usr/lib64/ld-linux-x86-64.so.2 ; usr_lib64_ld_linux_x86_64.so.2.r.7f52c4b06000 +0x00007f52c4b11000 - 0x00007f52c4b15000 - usr 16K s rw- /usr/lib64/ld-linux-x86-64.so.2 /usr/lib64/ld-linux-x86-64.so.2 ; usr_lib64_ld_linux_x86_64.so.2.rw +0x00007fff03836000 - 0x00007fff03858000 - usr 136K s rw- [stack] [stack] ; stack_.rw +0x00007fff038bc000 - 0x00007fff038c0000 - usr 16K s r-- [vvar] [vvar] ; vvar_.r +0x00007fff038c0000 - 0x00007fff038c2000 - usr 8K s r-x [vdso] [vdso] ; vdso_.r_x +0xffffffffff600000 - 0xffffffffff601000 - usr 4K s --x [vsyscall] [vsyscall] ; vsyscall_.__x +``` + +For those of you who prefer a more visual way, you can use `dm=` to see the memory maps using an ASCII-art bars. +This will be handy when you want to see how these maps are located in the memory. + +``` +[0x7f52c4afbbb0]> dm= +map 4K - 0x00007f52c4ae0000 |------------------------------| 0x00007f52c4ae1000 r-- /usr/lib64/ld-linux-x86-64.so.2 +map 148K * 0x00007f52c4ae1000 |------------------------------| 0x00007f52c4b06000 r-x /usr/lib64/ld-linux-x86-64.so.2 +map 44K - 0x00007f52c4b06000 |------------------------------| 0x00007f52c4b11000 r-- /usr/lib64/ld-linux-x86-64.so.2 +map 16K - 0x00007f52c4b11000 |------------------------------| 0x00007f52c4b15000 rw- /usr/lib64/ld-linux-x86-64.so.2 +map 4K - 0xffffffffff600000 |------------------------------| 0xffffffffff601000 --x [vsyscall] +map 136K - 0x00007fff03836000 |------------------------------| 0x00007fff03858000 rw- [stack] +map 16K - 0x00007fff038bc000 |------------------------------| 0x00007fff038c0000 r-- [vvar] +map 8K - 0x00007fff038c0000 |------------------------------| 0x00007fff038c2000 r-x [vdso] +map 4K - 0x000055ca0f426000 |#######-----------------------| 0x000055ca0f427000 r-- /tmp/hello_world +map 4K - 0x000055ca0f427000 |------#######-----------------| 0x000055ca0f428000 r-x /tmp/hello_world +map 4K - 0x000055ca0f428000 |------------#######-----------| 0x000055ca0f429000 r-- /tmp/hello_world +map 8K - 0x000055ca0f429000 |------------------############| 0x000055ca0f42b000 rw- /tmp/hello_world ``` -For those of you who prefer a more visual way, you can use `dm=` to see the memory maps using an ASCII-art bars. This will be handy when you want to see how these maps are located in the memory. - If you want to know the memory-map you are currently in, use `dm.`: ``` -[0x7f133f022fb0]> dm. -0x00007f947eed9000 # 0x00007f947eefe000 * usr 148K s r-x /usr/lib/ld-2.27.so /usr/lib/ld-2.27.so ; map.usr_lib_ld_2.27.so.r_x +[0x7f52c4afbbb0]> dm. +0x00007f52c4ae1000 - 0x00007f52c4b06000 * usr 148K s r-x /usr/lib64/ld-linux-x86-64.so.2 /usr/lib64/ld-linux-x86-64.so.2 ; usr_lib64_ld_linux_x86_64.so.2.r_x ``` -Using `dmm` we can "List modules (libraries, binaries loaded in memory)", this is quite a handy command to see which modules were loaded. +Using `dmm` we can "List modules (libraries, binaries loaded in memory)", this is quite a handy command to see +which modules were loaded. ``` -[0x7fa80a19dfb0]> dmm -0x55ca23a4a000 /tmp/helloworld -0x7fa80a19d000 /usr/lib/ld-2.27.so +[0x7f52c4afbbb0]> dmm +0x55ca0f426000 0x55ca0f427000 /tmp/hello_world +0x7f52c4ae0000 0x7f52c4ae1000 /usr/lib64/ld-linux-x86-64.so.2 ``` -> Note that the output of `dm` subcommands, and `dmm` specifically, might be different in various systems and different binaries. -We can see that along with our `helloworld` binary itself, another library was loaded which is `ld-2.27.so`. We don't see `libc` yet and this is because rizin breaks before `libc` is loaded to memory. Let's use `dcu` (**d**ebug **c**ontinue **u**ntil) to execute our program until the entry point of the program, which rizin flags as `entry0`. +> Note that the output of `dm` subcommands, and `dmm` specifically, might be different in various systems and +> different binaries. + +We can see that along with our `hello_world` binary itself, another library was loaded which is `ld-linux-x86-64.so.2`. +We don't see `libc` yet and this is because Rizin breaks before `libc` is loaded to memory. +Let's use `dcu` (**d**ebug **c**ontinue **u**ntil) to execute our program until the entry point of the program, +which Rizin flags as `entry0`: ``` -[0x7fa80a19dfb0]> dcu entry0 -Continue until 0x55ca23a4a520 using 1 bpsize -hit breakpoint at: 55ca23a4a518 -[0x55ca23a4a520]> dmm -0x55ca23a4a000 /tmp/helloworld -0x7fa809de1000 /usr/lib/libc-2.27.so -0x7fa80a19d000 /usr/lib/ld-2.27.so +[0x7f52c4afbbb0]> dcu entry0 +Continue until 0x55ca0f427100 +hit breakpoint at: 0x55ca0f427100 + +[0x55ca0f427100]> dmm +0x55ca0f426000 0x55ca0f427000 /tmp/hello_world +0x7f52c48c8000 0x7f52c48ec000 /usr/lib64/libc.so.6 +0x7f52c4ae0000 0x7f52c4ae1000 /usr/lib64/ld-linux-x86-64.so.2 ``` -Now we can see that `libc-2.27.so` was loaded as well, great! +Now we can see that `libc.so.6` was loaded as well, great! -Speaking of `libc`, a popular task for binary exploitation is to find the address of a specific symbol in a library. With this information in hand, you can build, for example, an exploit which uses ROP. This can be achieved using the `dmi` command. So if we want, for example, to find the address of [`system()`](http://man7.org/linux/man-pages/man3/system.3.html) in the loaded `libc`, we can simply execute the following command: +Speaking of `libc`, a popular task for binary exploitation is to find the address of a specific symbol in a library. +With this information in hand, you can build, for example, an exploit which uses ROP. This can be achieved using +the `dmi` command. So if we want, for example, to find the address +of [`system()`](http://man7.org/linux/man-pages/man3/system.3.html) in the loaded `libc`, +we can simply execute the following command: ``` -[0x55ca23a4a520]> dmi libc system -514 0x00000000 0x7fa809de1000 LOCAL FILE 0 system.c -515 0x00043750 0x7fa809e24750 LOCAL FUNC 1221 do_system -4468 0x001285a0 0x7fa809f095a0 LOCAL FUNC 100 svcerr_systemerr -5841 0x001285a0 0x7fa809f095a0 LOCAL FUNC 100 svcerr_systemerr -6427 0x00043d10 0x7fa809e24d10 WEAK FUNC 45 system -7094 0x00043d10 0x7fa809e24d10 GLBAL FUNC 45 system -7480 0x001285a0 0x7fa809f095a0 GLBAL FUNC 100 svcerr_systemerr +[0x55ca0f427100]> dmi libc system +[Symbols] +nth paddr vaddr bind type size lib name +--------------------------------------------------------- +1052 0x0004d2f0 0x7f52c49152f0 WEAK FUNC 45 system ``` -Similar to the `dm.` command, with `dmi.` you can see the closest symbol to the current address. +Similar to the `dm.` command, with `dmi.` you can see the closest symbol to the current address: + +``` +[0x55ca0f427100]> dmi. libc system +[Symbols] +nth paddr vaddr bind type size lib name +------------------------------------------------------ +20 ---------- 0x00004018 GLOBAL NOTYPE 0 _end +``` -Another useful command is to list the sections of a specific library. In the following example we'll list the sections of `ld-2.27.so`: +Another useful command is to list the sections of a specific library. In the following example we'll list +the sections of `ld-linux-x86-64.so.2`: ``` -[0x55a7ebf09520]> dmS ld-2.27 +[0x55ca0f427100]> dmS ld-linux-x86-64.so.2 [Sections] -00 0x00000000 0 0x00000000 0 ---- ld-2.27.so. -01 0x000001c8 36 0x4652d1c8 36 -r-- ld-2.27.so..note.gnu.build_id -02 0x000001f0 352 0x4652d1f0 352 -r-- ld-2.27.so..hash -03 0x00000350 412 0x4652d350 412 -r-- ld-2.27.so..gnu.hash -04 0x000004f0 816 0x4652d4f0 816 -r-- ld-2.27.so..dynsym -05 0x00000820 548 0x4652d820 548 -r-- ld-2.27.so..dynstr -06 0x00000a44 68 0x4652da44 68 -r-- ld-2.27.so..gnu.version -07 0x00000a88 164 0x4652da88 164 -r-- ld-2.27.so..gnu.version_d -08 0x00000b30 1152 0x4652db30 1152 -r-- ld-2.27.so..rela.dyn -09 0x00000fb0 11497 0x4652dfb0 11497 -r-x ld-2.27.so..text -10 0x0001d0e0 17760 0x4654a0e0 17760 -r-- ld-2.27.so..rodata -11 0x00021640 1716 0x4654e640 1716 -r-- ld-2.27.so..eh_frame_hdr -12 0x00021cf8 9876 0x4654ecf8 9876 -r-- ld-2.27.so..eh_frame -13 0x00024660 2020 0x46751660 2020 -rw- ld-2.27.so..data.rel.ro -14 0x00024e48 336 0x46751e48 336 -rw- ld-2.27.so..dynamic -15 0x00024f98 96 0x46751f98 96 -rw- ld-2.27.so..got -16 0x00025000 3960 0x46752000 3960 -rw- ld-2.27.so..data -17 0x00025f78 0 0x46752f80 376 -rw- ld-2.27.so..bss -18 0x00025f78 17 0x00000000 17 ---- ld-2.27.so..comment -19 0x00025fa0 63 0x00000000 63 ---- ld-2.27.so..gnu.warning.llseek -20 0x00025fe0 13272 0x00000000 13272 ---- ld-2.27.so..symtab -21 0x000293b8 7101 0x00000000 7101 ---- ld-2.27.so..strtab -22 0x0002af75 215 0x00000000 215 ---- ld-2.27.so..shstrtab +paddr size vaddr vsize align perm name type flags +-------------------------------------------------------------------------------------------------------------------- +0x00000000 0x0 ---------- 0x0 0x0 ---- ld-linux-x86-64.so.2. NULL +0x00000270 0x1e8 0x7f52c4ae0270 0x1e8 0x0 -r-- ld-linux-x86-64.so.2..gnu.hash GNU_HASH alloc +0x00000458 0x3c0 0x7f52c4ae0458 0x3c0 0x0 -r-- ld-linux-x86-64.so.2..dynsym DYNSYM alloc +0x00000818 0x2ca 0x7f52c4ae0818 0x2ca 0x0 -r-- ld-linux-x86-64.so.2..dynstr STRTAB alloc +0x00000ae2 0x50 0x7f52c4ae0ae2 0x50 0x0 -r-- ld-linux-x86-64.so.2..gnu.version VERSYM alloc +0x00000b38 0xec 0x7f52c4ae0b38 0xec 0x0 -r-- ld-linux-x86-64.so.2..gnu.version_d VERDEF alloc +0x00000c28 0x18 0x7f52c4ae0c28 0x18 0x0 -r-- ld-linux-x86-64.so.2..rela.dyn RELA alloc +0x00000c40 0x18 0x7f52c4ae0c40 0x18 0x0 -r-- ld-linux-x86-64.so.2..relr.dyn NUM alloc +0x00001000 0x24ebb 0x7f52c4ae1000 0x24ebb 0x0 -r-x ld-linux-x86-64.so.2..text PROGBITS alloc,execute +0x00026000 0x64e8 0x7f52c4b06000 0x64e8 0x0 -r-- ld-linux-x86-64.so.2..rodata PROGBITS alloc +0x0002c4e8 0x9a4 0x7f52c4b0c4e8 0x9a4 0x0 -r-- ld-linux-x86-64.so.2..eh_frame_hdr PROGBITS alloc +0x0002ce90 0x36f8 0x7f52c4b0ce90 0x36f8 0x0 -r-- ld-linux-x86-64.so.2..eh_frame PROGBITS alloc +0x00030588 0x40 0x7f52c4b10588 0x40 0x0 -r-- ld-linux-x86-64.so.2..note.gnu.property NOTE alloc +0x000312e0 0x1ba0 0x7f52c4b112e0 0x1ba0 0x0 -rw- ld-linux-x86-64.so.2..data.rel.ro PROGBITS write,alloc +0x00032e80 0x170 0x7f52c4b12e80 0x170 0x0 -rw- ld-linux-x86-64.so.2..dynamic DYNAMIC write,alloc +0x00033000 0x1104 0x7f52c4b13000 0x1104 0x0 -rw- ld-linux-x86-64.so.2..data PROGBITS write,alloc +0x00034104 0x0 0x7f52c4b14110 0x1d0 0x0 -rw- ld-linux-x86-64.so.2..bss NOBITS write,alloc +0x00034104 0x33 ---------- 0x33 0x0 ---- ld-linux-x86-64.so.2..comment PROGBITS merge,strings +0x00034138 0x40f8 ---------- 0x40f8 0x0 ---- ld-linux-x86-64.so.2..symtab SYMTAB +0x00038230 0x23f4 ---------- 0x23f4 0x0 ---- ld-linux-x86-64.so.2..strtab STRTAB +0x0003a624 0xc8 ---------- 0xc8 0x0 ---- ld-linux-x86-64.so.2..shstrtab STRTAB ``` diff --git a/src/debugger/migration.md b/src/debugger/migration.md index b0c5f407..8bc0a4fc 100644 --- a/src/debugger/migration.md +++ b/src/debugger/migration.md @@ -1,10 +1,10 @@ -# Migration from ida, GDB or WinDBG +# Migration from IDA, GDB or WinDBG -## How to run the program using the debugger ## +## How to run the program using the debugger `rizin -d /bin/ls` - start in debugger mode => [[video](http://asciinema.org/a/12022)] -## How do I attach/detach to running process ? (gdb -p) ## +## How do I attach/detach to running process (gdb -p)? `rizin -d ` - attach to process @@ -14,11 +14,11 @@ `rizin -D gdb gdb://localhost:1234` - attach to gdbserver -## How to set args/environment variable/load a specific libraries for the debugging session of rizin +## How to set args/environment variable/load a specific libraries for the debugging session of rizin? Use `rz-run` (`libpath=$PWD:/tmp/lib`, `arg2=hello`, `setenv=FOO=BAR` ...) see `rz-run -h` / `man rz-run` -## How to script rizin ? +## How to script rizin? `rizin -i ...` - run a script **after** loading the file => [[video](http://asciinema.org/a/12020)] @@ -28,95 +28,96 @@ Use `rz-run` (`libpath=$PWD:/tmp/lib`, `arg2=hello`, `setenv=FOO=BAR` ...) see ` `[0x80480423]> . scriptfile` - interpret this file => [[video](http://asciinema.org/a/12017)] -`[0x80480423]> #!c` - enter C repl (see `#!` to list all available RLang plugins) => [[video](http://asciinema.org/a/12019)], everything have to be done in a oneliner or a .c file must be passed as an argument. +`[0x80480423]> #!c` - enter C repl (see `#!` to list all available RLang plugins) => [[video](http://asciinema.org/a/12019)], +everything have to be done in an oneliner or a .c file must be passed as an argument. To get `#!python` and much more, just build [rizin-bindings](https://github.com/rizinorg/rizin-bindings) -## How to list Source code as in gdb list ? +## How to list Source code as in gdb list? `CL @ sym.main` - though the feature is highly experimental # shortcuts -| Command | IDA Pro | rizin | rizin (visual mode) | GDB | WinDbg | -| ------------- | ------------- |----------------|------------------|-----|-----| -|**Analysis** | | | -|Analysis of everything |`Automatically launched when opening a binary` |`aaa or -A (aaaa or -AA for even experimental analysis)` | `N/A` |N/A |N/A -|**Navigation** | | | -|xref to |`x` |`axt` | `x` |N/A |N/A -|xref from |`ctrl + j` |`axf` | `X` |N/A |N/A -|xref to graph |? |`agt [offset]` | ? |N/A |N/A -|xref from graph|? |`agf [offset]` | ? |N/A |N/A -|list functions |`alt + 1` |`afl;is` | `t` |N/A |N/A -|listing |`alt + 2` |`pdf` | `p` |N/A |N/A -|hex mode |`alt + 3` |`pxa` | `P` |N/A |N/A -|imports |`alt + 6` |`ii` | `:ii` |N/A |N/A -|exports |`alt + 7` |`is~FUNC` | ? |N/A |N/A -|follow jmp/call|`enter` |`s offset` |`enter` or `0`-`9`|N/A |N/A -|undo seek |`esc` |`shu` | `u` |N/A |N/A -|redo seek |`ctrl+enter` |`shr` | `U` |N/A |N/A -|show graph |`space` |`agv` | `V` |N/A |N/A -|**Edit** | | | -|rename |`n` |`afn`| `dr` |N/A |N/A -|graph view |`space` |`agv` | `V` |N/A |N/A -|define as data |`d` |`Cd [size]` | `dd`,`db`,`dw`,`dW`|N/A |N/A -|define as code |`c` |`C- [size]` | `d-` or `du` |N/A |N/A -|define as undefined|`u` |`C- [size]` | `d-` or `du` |N/A |N/A -|define as string|`A` |`Cs [size]` | `ds` |N/A |N/A -|define as struct|`Alt+Q` |`Cf [size]` | `dF` |N/A |N/A -|**Debugger** | | | | -|Start Process/ Continue execution|`F9` |`dc` | `F9` | `r` and `c` | `g` -|Terminate Process|`Ctrl+F2` |`dk 9` | ? | `kill` | `q` -|Detach |`?` | `o-` | ? | `detach` | -|step into |`F7` |`ds` | `s` | `n` | `t` -|step into 4 instructions | ? | `ds 4` | F7 | `n 4` | `t 4` -|step over |`F8` |`dso` | `S` | `s` | `p` -|step until a specific address|?|`dsu ` | ? | `s` | `g ` -|Run until return|`Ctrl+F7` |`dcr` | ? | `finish` | `gu` -|Run until cursor|`F4` |[#249](https://github.com/rizinorg/rizin/issues/249)| [#249](https://github.com/rizinorg/rizin/issues/249) | N/A | N/A -|Show Backtrace |`?` |`dbt` | ? | `bt` | -|display Register|On register Windows|`dr all` | Shown in Visual mode | `info registers` | `r` -|display eax |On register Windows |`dr?eax` | Shown in Visual mode | `info registers eax` | `r rax` -|display old state of all registers |? |`dro` | ? | ? | ? -|display function addr + N |? |`afi $$` - display function information of current offset (`$$`) | ? | ? | ? -|display frame state |? |`pxw rbp-rsp@rsp`| ? | `i f` | ? -|How to step until condition is true |? |`dsi` | ? | ? | ? -|Update a register value |? |`dr rip=0x456` | ? | `set $rip=0x456` | `r rip=456` -|**Disassembly** | | | | -|disassembly forward | N/A | `pd` | `Vp` | `disas` | `uf`, `u` -|disassembly N instructions | N/A | `pd X` | Vp | x/i | `u LX` -|disassembly N (backward) | N/A | `pd -X` | `Vp` | `disas ` | `ub` -|**Information on the bin** | | | | -|Sections/regions | `Menu sections` | `iS` or `S` (append j for json) | N/A | maint info sections | !address -|**Load symbol file** | | | | -|Sections/regions | `pdb menu` | `asm.dwarf.file`, `pdb.XX`) | N/A | add-symbol-file | r -|**BackTrace** | | | | -|Stack Trace | N/A | `dbt` | N/A | `bt` | `k` -|Stack Trace in Json | N/A | `dbtj` | N/A | | -|Partial Backtrace (innermost) | N/A | `dbt` (`dbg.btdepth` `dbg.btalgo`) | N/A | bt | k -|Partial Backtrace (outermost) | N/A | `dbt` (`dbg.btdepth` `dbg.btalgo`) | N/A | bt - | -|Stacktrace for all threads | N/A | `dbt@t` | N/A | `thread apply all bt` | `~* k` -|**Breakpoints** | | | | | -|Breakpoint list |`Ctrl+Alt+B` |`db` | ? | `info breakpoints` | `bl` -|add breakpoint |`F2` |`db [offset]` |`F2` | `break`| `bp` -|**Threads** | | | | | -|Switch to thread |`Thread menu` |`dp` | N/A | `thread `| `~s` -|**Frames** | | | | | -|Frame Numbers |`N/A` |`?` | N/A | `any bt command`| `kn` -|Select Frame |`N/A` |`?` | N/A | `frame`| `.frame` -|**Parameters/Locals** | | | | | -|Display parameters |`N/A` |`afv` | N/A | `info args`| `dv /t /i /V` -|Display parameters |`N/A` |`afv` | N/A | `info locals`| `dv /t /i /V` -|Display parameters/locals in json |`N/A` |`afvj` | N/A | `info locals`| `dv /t /i /V` -|list addresses where vars are accessed(R/W) |`N/A` |`afvR/afvW` | N/A | `?`| `?` -|**Project Related** | | | | -|open project | |`Po [file]` | | ? -|save project | automatic |`Ps [file]` | | ? -|show project information | |`Pi [file]` | | ? -|**Miscellaneous** | | | | -|Dump byte char array | `N/A` |`pc?` (json, C, char, etc.) | Vpppp | x/bc | db -|options |option menu |`e?` | `e` | -|search |search menu |`/?` | Select the zone with the cursor `c` then `/` | | `s` +| Command | IDA Pro | rizin | rizin (visual mode) | GDB | WinDbg | +|---------------------------------------------|------------------------------------------------|------------------------------------------------------------------|------------------------------------------------------|-----------------------|---------------| +| **Analysis** | | | | | | +| Analysis of everything | `Automatically launched when opening a binary` | `aaa or -A (aaaa or -AA for even experimental analysis)` | `N/A` | N/A | N/A | +| **Navigation** | | | | | | +| xref to | `x` | `axt` | `x` | N/A | N/A | +| xref from | `ctrl + j` | `axf` | `X` | N/A | N/A | +| xref to graph | ? | `agt [offset]` | ? | N/A | N/A | +| xref from graph | ? | `agf [offset]` | ? | N/A | N/A | +| list functions | `alt + 1` | `afl;is` | `t` | N/A | N/A | +| listing | `alt + 2` | `pdf` | `p` | N/A | N/A | +| hex mode | `alt + 3` | `pxa` | `P` | N/A | N/A | +| imports | `alt + 6` | `ii` | `:ii` | N/A | N/A | +| exports | `alt + 7` | `is~FUNC` | ? | N/A | N/A | +| follow jmp/call | `enter` | `s offset` | `enter` or `0`-`9` | N/A | N/A | +| undo seek | `esc` | `shu` | `u` | N/A | N/A | +| redo seek | `ctrl+enter` | `shr` | `U` | N/A | N/A | +| show graph | `space` | `agv` | `V` | N/A | N/A | +| **Edit** | | | | | | +| rename | `n` | `afn` | `dr` | N/A | N/A | +| graph view | `space` | `agv` | `V` | N/A | N/A | +| define as data | `d` | `Cd [size]` | `dd`,`db`,`dw`,`dW` | N/A | N/A | +| define as code | `c` | `C- [size]` | `d-` or `du` | N/A | N/A | +| define as undefined | `u` | `C- [size]` | `d-` or `du` | N/A | N/A | +| define as string | `A` | `Cs [size]` | `ds` | N/A | N/A | +| define as struct | `Alt+Q` | `Cf [size]` | `dF` | N/A | N/A | +| **Debugger** | | | | | | +| Start Process/ Continue execution | `F9` | `dc` | `F9` | `r` and `c` | `g` | +| Terminate Process | `Ctrl+F2` | `dk 9` | ? | `kill` | `q` | +| Detach | `?` | `o-` | ? | `detach` | | +| step into | `F7` | `ds` | `s` | `n` | `t` | +| step into 4 instructions | ? | `ds 4` | F7 | `n 4` | `t 4` | +| step over | `F8` | `dso` | `S` | `s` | `p` | +| step until a specific address | ? | `dsu ` | ? | `s` | `g ` | +| Run until return | `Ctrl+F7` | `dcr` | ? | `finish` | `gu` | +| Run until cursor | `F4` | [#249](https://github.com/rizinorg/rizin/issues/249) | [#249](https://github.com/rizinorg/rizin/issues/249) | N/A | N/A | +| Show Backtrace | `?` | `dbt` | ? | `bt` | | +| display Register | On register Windows | `dr all` | Shown in Visual mode | `info registers` | `r` | +| display eax | On register Windows | `dr?eax` | Shown in Visual mode | `info registers eax` | `r rax` | +| display old state of all registers | ? | `dro` | ? | ? | ? | +| display function addr + N | ? | `afi $$` - display function information of current offset (`$$`) | ? | ? | ? | +| display frame state | ? | `pxw rbp-rsp@rsp` | ? | `i f` | ? | +| How to step until condition is true | ? | `dsi` | ? | ? | ? | +| Update a register value | ? | `dr rip=0x456` | ? | `set $rip=0x456` | `r rip=456` | +| **Disassembly** | | | | | | +| disassembly forward | N/A | `pd` | `Vp` | `disas` | `uf`, `u` | +| disassembly N instructions | N/A | `pd X` | Vp | x/i | `u LX` | +| disassembly N (backward) | N/A | `pd -X` | `Vp` | `disas ` | `ub` | +| **Information on the bin** | | | | | | +| Sections/regions | `Menu sections` | `iS` or `S` (append j for json) | N/A | maint info sections | !address | +| **Load symbol file** | | | | | | +| Sections/regions | `pdb menu` | `asm.dwarf.file`, `pdb.XX`) | N/A | add-symbol-file | r | +| **BackTrace** | | | | | | +| Stack Trace | N/A | `dbt` | N/A | `bt` | `k` | +| Stack Trace in Json | N/A | `dbtj` | N/A | | | +| Partial Backtrace (innermost) | N/A | `dbt` (`dbg.btdepth` `dbg.btalgo`) | N/A | bt | k | +| Partial Backtrace (outermost) | N/A | `dbt` (`dbg.btdepth` `dbg.btalgo`) | N/A | bt - | | +| Stacktrace for all threads | N/A | `dbt@t` | N/A | `thread apply all bt` | `~* k` | +| **Breakpoints** | | | | | | +| Breakpoint list | `Ctrl+Alt+B` | `db` | ? | `info breakpoints` | `bl` | +| add breakpoint | `F2` | `db [offset]` | `F2` | `break` | `bp` | +| **Threads** | | | | | | +| Switch to thread | `Thread menu` | `dp` | N/A | `thread ` | `~s` | +| **Frames** | | | | | | +| Frame Numbers | `N/A` | `?` | N/A | `any bt command` | `kn` | +| Select Frame | `N/A` | `?` | N/A | `frame` | `.frame` | +| **Parameters/Locals** | | | | | | +| Display parameters | `N/A` | `afv` | N/A | `info args` | `dv /t /i /V` | +| Display parameters | `N/A` | `afv` | N/A | `info locals` | `dv /t /i /V` | +| Display parameters/locals in json | `N/A` | `afvj` | N/A | `info locals` | `dv /t /i /V` | +| list addresses where vars are accessed(R/W) | `N/A` | `afvR/afvW` | N/A | `?` | `?` | +| **Project Related** | | | | | | +| open project | | `Po [file]` | | ? | | +| save project | automatic | `Ps [file]` | | ? | | +| show project information | | `Pi [file]` | | ? | | +| **Miscellaneous** | | | | | | +| Dump byte char array | `N/A` | `pc?` (json, C, char, etc.) | Vpppp | x/bc | db | +| options | option menu | `e?` | `e` | | | +| search | search menu | `/?` | Select the zone with the cursor `c` then `/` | | `s` | ## Equivalent of "set-follow-fork-mode" gdb command This can be done in two ways: diff --git a/src/debugger/registers.md b/src/debugger/registers.md index 7cdadcac..6a5d77cc 100644 --- a/src/debugger/registers.md +++ b/src/debugger/registers.md @@ -1,134 +1,167 @@ # Registers -The registers are part of a user area stored in the context structure used by the scheduler. This structure can be manipulated to get and set the values of those registers, and, for example, on Intel hosts, it is possible to directly manipulate DR0-DR7 hardware registers to set hardware breakpoints. +The registers are part of a user area stored in the context structure used by the scheduler. This structure can be +manipulated to get and set the values of those registers, and, for example, on Intel hosts, it is possible to +directly manipulate DR0-DR7 hardware registers to set hardware breakpoints. There are different commands to get values of registers. For the General Purpose ones use: ``` -[0x4A13B8C0]> dr -r15 = 0x00000000 -r14 = 0x00000000 -r13 = 0x00000000 -r12 = 0x00000000 -rbp = 0x00000000 -rbx = 0x00000000 -r11 = 0x00000000 -r10 = 0x00000000 -r9 = 0x00000000 -r8 = 0x00000000 -rax = 0x00000000 -rcx = 0x00000000 -rdx = 0x00000000 -rsi = 0x00000000 -rdi = 0x00000000 -oeax = 0x0000003b -rip = 0x7f20bf5df630 -rsp = 0x7fff515923c0 - -[0x7f0f2dbae630]> dr rip ; get value of 'rip' -0x7f0f2dbae630 +[0x55ca0f427100]> dr +rax = 0x0000000000000038 +rbx = 0x0000000000000000 +rcx = 0x00007fff03855298 +rdx = 0x00007f52c4ae4080 +r8 = 0x0000000000000000 +r9 = 0x00000000000007f8 +r10 = 0x00007fff03855190 +r11 = 0x0000000000000206 +r12 = 0x000055ca0f427100 +r13 = 0x00007fff03855280 +r14 = 0x0000000000000000 +r15 = 0x0000000000000000 +rsi = 0x00007f52c4b148b8 +rdi = 0x00007f52c4b142e0 +rsp = 0x00007fff03855280 +rbp = 0x0000000000000000 +rip = 0x000055ca0f427100 +cs = 0x0000000000000033 +rflags = 0x0000000000000206 +orax = 0xffffffffffffffff +ss = 0x000000000000002b +fs = 0x00007f52c48c5740 +gs = 0x0000000000000000 +ds = 0x0000000000000000 +es = 0x0000000000000000 +fs_base = 0x0000000000000000 +gs_base = 0x0000000000000000 + +[0x55ca0f427100]> dr rip ; get value of 'rip' +rip = 0x000055ca0f427100 [0x4A13B8C0]> dr rip = esp ; set 'rip' as esp ``` -Interaction between a plugin and the core is done by commands returning rizin instructions. This is used, for example, to set flags in the core to set values of registers. +Interaction between a plugin and the core is done by commands returning Rizin instructions. This is used, for example, +to set flags in the core to set values of registers. ``` -[0x7f0f2dbae630]> dr* ; Appending '*' will show rizin commands -f r15 1 0x0 -f r14 1 0x0 -f r13 1 0x0 -f r12 1 0x0 -f rbp 1 0x0 -f rbx 1 0x0 -f r11 1 0x0 -f r10 1 0x0 -f r9 1 0x0 -f r8 1 0x0 -f rax 1 0x0 -f rcx 1 0x0 -f rdx 1 0x0 -f rsi 1 0x0 -f rdi 1 0x0 -f oeax 1 0x3b -f rip 1 0x7fff73557940 -f rflags 1 0x200 -f rsp 1 0x7fff73557940 - -[0x4A13B8C0]> .dr* ; include common register values in flags +[0x55ca0f427100]> dr* ; Appending '*' will show rizin commands +ar rax = 0x0000000000000038 +ar rbx = 0x0000000000000000 +ar rcx = 0x00007fff03855298 +ar rdx = 0x00007f52c4ae4080 +ar r8 = 0x0000000000000000 +ar r9 = 0x00000000000007f8 +ar r10 = 0x00007fff03855190 +ar r11 = 0x0000000000000206 +ar r12 = 0x000055ca0f427100 +ar r13 = 0x00007fff03855280 +ar r14 = 0x0000000000000000 +ar r15 = 0x0000000000000000 +ar rsi = 0x00007f52c4b148b8 +ar rdi = 0x00007f52c4b142e0 +ar rsp = 0x00007fff03855280 +ar rbp = 0x0000000000000000 +ar rip = 0x000055ca0f427100 +ar cs = 0x0000000000000033 +ar rflags = 0x0000000000000206 +ar orax = 0xffffffffffffffff +ar ss = 0x000000000000002b +ar fs = 0x00007f52c48c5740 +ar gs = 0x0000000000000000 +ar ds = 0x0000000000000000 +ar es = 0x0000000000000000 +ar fs_base = 0x0000000000000000 +ar gs_base = 0x0000000000000000 ``` -An old copy of registers is stored all the time to keep track of the changes done during execution of a program being analyzed. This old copy can be accessed with `oregs`. +An old copy of registers is stored all the time to keep track of the changes done during execution +of a program being analyzed. This old copy can be accessed with `oregs`. ``` -[0x7f1fab84c630]> dro -r15 = 0x00000000 -r14 = 0x00000000 -r13 = 0x00000000 -r12 = 0x00000000 -rbp = 0x00000000 -rbx = 0x00000000 -r11 = 0x00000000 -r10 = 0x00000000 -r9 = 0x00000000 -r8 = 0x00000000 -rax = 0x00000000 -rcx = 0x00000000 -rdx = 0x00000000 -rsi = 0x00000000 -rdi = 0x00000000 -oeax = 0x0000003b -rip = 0x7f1fab84c630 -rflags = 0x00000200 -rsp = 0x7fff386b5080 +[0x55ca0f427100]> dro +rax = 0x0000000000000038 +rbx = 0x0000000000000000 +rcx = 0x00007fff03855298 +rdx = 0x00007f52c4ae4080 +r8 = 0x0000000000000000 +r9 = 0x00000000000007f8 +r10 = 0x00007fff03855190 +r11 = 0x0000000000000206 +r12 = 0x000055ca0f427100 +r13 = 0x00007fff03855280 +r14 = 0x0000000000000000 +r15 = 0x0000000000000000 +rsi = 0x00007f52c4b148b8 +rdi = 0x00007f52c4b142e0 +rsp = 0x00007fff03855280 +rbp = 0x0000000000000000 +rip = 0x000055ca0f427101 +cs = 0x0000000000000033 +rflags = 0x0000000000000206 +orax = 0xffffffffffffffff +ss = 0x000000000000002b +fs = 0x00007f52c48c5740 +gs = 0x0000000000000000 +ds = 0x0000000000000000 +es = 0x0000000000000000 +fs_base = 0x0000000000000000 +gs_base = 0x0000000000000000 ``` Current state of registers ``` -[0x7f1fab84c630]> dr -r15 = 0x00000000 -r14 = 0x00000000 -r13 = 0x00000000 -r12 = 0x00000000 -rbp = 0x00000000 -rbx = 0x00000000 -r11 = 0x00000000 -r10 = 0x00000000 -r9 = 0x00000000 -r8 = 0x00000000 -rax = 0x00000000 -rcx = 0x00000000 -rdx = 0x00000000 -rsi = 0x00000000 -rdi = 0x7fff386b5080 -oeax = 0xffffffffffffffff -rip = 0x7f1fab84c633 -rflags = 0x00000202 -rsp = 0x7fff386b5080 +[0x55ca0f427100]> dr +rax = 0x0000000000000038 +rbx = 0x0000000000000000 +rcx = 0x00007fff03855298 +rdx = 0x00007f52c4ae4080 +r8 = 0x0000000000000000 +r9 = 0x00000000000007f8 +r10 = 0x00007fff03855190 +r11 = 0x0000000000000206 +r12 = 0x000055ca0f427100 +r13 = 0x00007fff03855280 +r14 = 0x0000000000000000 +r15 = 0x0000000000000000 +rsi = 0x00007f52c4b148b8 +rdi = 0x00007f52c4b142e0 +rsp = 0x00007fff03855280 +rbp = 0x0000000000000000 +rip = 0x000055ca0f427100 +cs = 0x0000000000000033 +rflags = 0x0000000000000206 +orax = 0xffffffffffffffff +ss = 0x000000000000002b +fs = 0x00007f52c48c5740 +gs = 0x0000000000000000 +ds = 0x0000000000000000 +es = 0x0000000000000000 +fs_base = 0x0000000000000000 +gs_base = 0x0000000000000000 ``` -Values stored in eax, oeax and eip have changed. +Value stored in rip have changed. -To store and restore register values you can just dump the output of 'dr*' command to disk and then re-interpret it again: +To store and restore register values you can just dump the output of 'dr*' command to disk and then +re-interpret it again: ``` -[0x4A13B8C0]> dr* > regs.saved ; save registers -[0x4A13B8C0]> drp regs.saved ; restore +[0x55ca0f427100]> dr* > regs.saved ; save registers +[0x55ca0f427100]> drp regs.saved ; restore ``` EFLAGS can be similarly altered. E.g., setting selected flags: ``` -[0x4A13B8C0]> dr eflags = pst -[0x4A13B8C0]> dr eflags = azsti +[0x55ca0f427100]> dr eflags = pst +[0x55ca0f427100]> dr eflags = azsti ``` You can get a string which represents latest changes of registers using `drd` command (diff registers): ``` [0x4A13B8C0]> drd -oeax = 0x0000003b was 0x00000000 delta 59 -rip = 0x7f00e71282d0 was 0x00000000 delta -418217264 -rflags = 0x00000200 was 0x00000000 delta 512 -rsp = 0x7fffe85a09c0 was 0x00000000 delta -396752448 +rip = 0x55ca0f427100 was 0x55ca0f427101 delta 0xffffffffffffffff ``` diff --git a/src/debugger/revdebug.md b/src/debugger/revdebug.md index 4796c21d..f0bb8e9a 100644 --- a/src/debugger/revdebug.md +++ b/src/debugger/revdebug.md @@ -1,7 +1,6 @@ # Reverse Debugging -Rizin has reverse debugger, that can seek the program counter backward. -(e.g. reverse-next, reverse-continue in gdb) +Rizin has reverse debugger, that can seek the program counter backward (e.g. reverse-next, reverse-continue in gdb). Firstly you need to save program state at the point that you want to start recording. The syntax for recording is: @@ -27,7 +26,7 @@ hit breakpoint at: 4028a2 When you run `dsb`, reverse debugger restore previous recorded state and execute program from it until desired point. -Or you can also try continue back: +Or you can also try to continue back: ``` [0x004028a0]> db @ 0x004028a2 @@ -54,21 +53,11 @@ NOTE: Program records can be saved at any moments. These are diff style format that save only different memory area from previous. It saves memory space rather than entire dump. -And also can add comment: - -``` -[0x004028c2]> dtsC 0 program start -[0x004028c2]> dtsC 1 decryption start -[0x004028c2]> dts -session: 0 at:0x004028a0 "program start" -session: 1 at:0x004028c2 "decryption start" -``` - You can leave notes for each records to keep in your mind. `dsb` and `dcb` commands restore the program state from latest record if there are many records. -Program records can exported to file and of course import it. +Program records can be exported to file and of course imported it. Export/Import records to/from file: ``` @@ -99,7 +88,7 @@ And step back by `aesb`: 0x00404879 ``` -In addition to the native reverse debugging capabilities in rizin, it's also possible to +In addition to the native reverse debugging capabilities in Rizin, it's also possible to use gdb's remote protocol to reverse debug a target gdbserver that supports it. `R!dsb` and `R!dcb` are available as `dsb` and `dcb` replacements for this purpose, see [remote gdb's documentation](../remote_access/remote_gdb.md) for more information. diff --git a/src/disassembling/adding_metadata.md b/src/disassembling/adding_metadata.md index 6ac4248a..5c77c156 100644 --- a/src/disassembling/adding_metadata.md +++ b/src/disassembling/adding_metadata.md @@ -3,58 +3,59 @@ The typical work involved in reversing binary files makes powerful annotation capabilities essential. Rizin offers multiple ways to store and retrieve such metadata. -By following common basic UNIX principles, it is easy to write a small utility in a scripting language which uses `objdump`, `otool` or any other existing utility to obtain information from a binary and to import it into rizin. For example, take a look at [python-idb](https://github.com/williballenthin/python-idb)-based [`rz-ida.py`](https://github.com/rizinorg/rizin-extras/blob/master/rz-ida/rz-ida.py) which opens IDB files directly without IDA Pro installed. You can load the resulting file with the `.` (dot) command into the rizin: +By following common basic UNIX principles, it is easy to write a small utility in a scripting language which uses +`objdump`, `otool` or any other existing utility to obtain information from a binary and to import it into Rizin. +For example, take a look at [python-idb](https://github.com/williballenthin/python-idb)-based +[`rz-ida.py`](https://github.com/rizinorg/rizin-extras/blob/master/rz-ida/rz-ida.py) which opens IDB files directly +without IDA Pro installed. You can load the resulting file with the `.` (dot) command into the Rizin: + ``` [0x00000000]> . file.rz ``` -The `C` command is used to manage comments and data conversions. You can define a range of program's bytes to be interpreted as either code, binary data or string. It is also possible to execute external code at every specified flag location in order to fetch some metadata, such as a comment, from an external file or database. +The `C` command is used to manage comments and data conversions. You can define a range of program's bytes to be +interpreted as either code, binary data or string. It is also possible to execute external code at every specified flag +location in order to fetch some metadata, such as a comment, from an external file or database. There are many different metadata manipulation commands, here is the glimpse of all of them: ``` [0x00404cc0]> C? -| Usage: C[-LCvsdfm*?][*?] [...] # Metadata management -| C list meta info in human friendly form -| C* list meta info in rizin commands -| C*. list meta info of current offset in rizin commands -| C- [len] [[@]addr] delete metadata at given address range -| C. list meta info of current offset in human friendly form -| CC! [@addr] edit comment with $EDITOR -| CC[?] [-] [comment-text] [@addr] add/remove comment -| CC.[addr] show comment in current address -| CCa[-at]|[at] [text] [@addr] add/remove comment at given address -| CCu [comment-text] [@addr] add unique comment -| CF[sz] [fcn-sign..] [@addr] function signature -| CL[-][*] [file:line] [addr] show or add 'code line' information (bininfo) -| CS[-][space] manage meta-spaces to filter comments, etc.. -| C[Cthsdmf] list comments/types/hidden/strings/data/magic/formatted in human friendly form -| C[Cthsdmf]* list comments/types/hidden/strings/data/magic/formatted in rizin commands -| Cd[-] [size] [repeat] [@addr] hexdump data array (Cd 4 10 == dword [10]) -| Cd. [@addr] show size of data at current address -| Cf[?][-] [sz] [0|cnt][fmt] [a0 a1...] [@addr] format memory (see pf?) -| Ch[-] [size] [@addr] hide data -| Cm[-] [sz] [fmt..] [@addr] magic parse (see pm?) -| Cs[?] [-] [size] [@addr] add string -| Ct[?] [-] [comment-text] [@addr] add/remove type analysis comment -| Ct.[@addr] show comment at current or specified address -| Cv[bsr][?] add comments to args -| Cz[@addr] add string (see Cs?) +Usage: C[?] # Code metadata (comments, format, hints, ..) +| C[j*l] # List all meta information +| C.[j*l] # Show all meta information at current address +| C- # Remove meta information at current address +| C-* # Remove all meta information +| CC[?] # Manipulate the comments +| CS[l-r?] # Manage metainformation spaces +| Cf[l-?] # Manage the format string metainformation +| Cd[l.-?] # Manage the raw data metainformation +| Ch[l-?] # Manage the "hidden" mark metainformation +| Cm[l-?] # Manage the "magic" mark metainformation +| Cs[?] # Manipulate string metainformation +| Ct[l-.?] # Manage the type metainformation +| Cv[-elrs] # Add comments to the vars or arguments ``` Simply to add the comment to a particular line/address you can use `Ca` command: ``` -[0x00000000]> CCa 0x0000002 this guy seems legit +[0x00000000]> CC 0x0000002 this guy seems legit [0x00000000]> pd 2 0x00000000 0000 add [rax], al ; this guy seems legit 0x00000002 0000 add [rax], al ``` -The `C?` family of commands lets you mark a range as one of several kinds of types. Three basic types are: code (disassembly is done using asm.arch), data (an array of data elements) or string. Use the `Cs` command to define a string, use the `Cd` command for defining an array of data elements, and use the `Cf` command to define more complex data structures like structs. +The `C?` family of commands lets you mark a range as one of several kinds of types. Three basic types are: +code (disassembly is done using asm.arch), data (an array of data elements) or string. Use the `Cs` command to define +a string, use the `Cd` command for defining an array of data elements, and use the `Cf` command to define more complex +data structures like structs. -Annotating data types is most easily done in visual mode, using the "d" key, short for "data type change". First, use the cursor to select a range of bytes (press `c` key to toggle cursor mode and use HJKL keys to expand selection), then press 'd' to get a menu of possible actions/types. For example, to mark the range as a string, use the 's' option from the menu. You can achieve the same result from the shell using the `Cs` command: +Annotating data types is most easily done in visual mode, using the "d" key, short for "data type change". First, +use the cursor to select a range of bytes (press `c` key to toggle cursor mode and use HJKL keys to expand selection), +then press 'd' to get a menu of possible actions/types. For example, to mark the range as a string, use the 's' option +from the menu. You can achieve the same result from the shell using the `Cs` command: ``` [0x00000000]> f string_foo @ 0x800 @@ -81,14 +82,20 @@ The `Cf` command is used to define a memory format string (the same syntax used 0x7fd9f13ae638 4989c4 mov r12, rax ``` -The `[sz]` argument to `Cf` is used to define how many bytes the struct should take up in the disassembly, and is completely independent from the size of the data structure defined by the format string. This may seem confusing, but has several uses. For example, you may want to see the formatted structure displayed in the disassembly, but still have those locations be visible as offsets and with raw bytes. Sometimes, you find large structures, but only identified a few fields, or only interested in specific fields. Then, you can tell rizin to display only those fields, using the format string and using 'skip' fields, and also have the disassembly continue after the entire structure, by giving it full size using the `sz` argument. +The `[sz]` argument to `Cf` is used to define how many bytes the struct should take up in the disassembly, +and is completely independent of the size of the data structure defined by the format string. This may seem confusing, +but has several uses. For example, you may want to see the formatted structure displayed in the disassembly, +but still have those locations be visible as offsets and with raw bytes. Sometimes, you find large structures, +but only identified a few fields, or only interested in specific fields. Then, you can tell Rizin to display only +those fields, using the format string and using 'skip' fields, and also have the disassembly continue after +the entire structure, by giving it full size using the `sz` argument. Using `Cf`, it's easy to define complex structures with simple one-liners. See `pf?` for more information. Remember that all these `C` commands can also be accessed from the visual mode by pressing the `d` (data conversion) key. Note that unlike [`t`](../analysis/types.md) commands `Cf` doesn't change analysis results. It is only a visual boon. -Sometimes just adding a single line of comments is not enough, in this case rizin allows you to +Sometimes just adding a single line of comments is not enough, in this case Rizin allows you to create a link for a particular text file. You can use it with `CC,` command or by pressing `,` key in the visual mode. This will open an `$EDITOR` to create a new file, or if filename does exist, just will create a link. It will be shown in the disassembly comments: @@ -104,4 +111,3 @@ will create a link. It will be shown in the disassembly comments: Note `,(locale-help.txt)` appeared in the comments, if we press `,` again in the visual mode, it will open the file. Using this mechanism we can create a long descriptions of some particular places in disassembly, link datasheets or related articles. - diff --git a/src/disassembling/esil.md b/src/disassembling/esil.md index 62207d39..3040d731 100644 --- a/src/disassembling/esil.md +++ b/src/disassembling/esil.md @@ -1,6 +1,10 @@ # ESIL -ESIL stands for 'Evaluable Strings Intermediate Language'. It aims to describe a [Forth](https://en.wikipedia.org/wiki/Forth_%28programming_language%29)-like representation for every target CPU opcode semantics. ESIL representations can be evaluated (interpreted) in order to emulate individual instructions. Each command of an ESIL expression is separated by a comma. Its virtual machine can be described as this: +ESIL stands for "Evaluable Strings Intermediate Language". It aims to describe +a [Forth](https://en.wikipedia.org/wiki/Forth_%28programming_language%29)-like representation for every target +CPU opcode semantics. ESIL representations can be evaluated (interpreted) in order to emulate individual instructions. +Each command of an ESIL expression is separated by a comma. Its virtual machine can be described as this: + ``` while ((word=haveCommand())) { if (word.isOperator()) { @@ -11,34 +15,55 @@ ESIL stands for 'Evaluable Strings Intermediate Language'. It aims to describe a nextCommand(); } ``` -As we can see ESIL uses a stack-based interpreter similar to what is commonly used for calculators. You have two categories of inputs: values and operators. A value simply gets pushed on the stack, an operator then pops values (its arguments if you will) off the stack, performs its operation and pushes its results (if any) back on. We can think of ESIL as a post-fix notation of the operations we want to do. + +As we can see ESIL uses a stack-based interpreter similar to what is commonly used for calculators. +You have two categories of inputs: values and operators. A value simply gets pushed on the stack, +an operator then pops values (its arguments if you will) off the stack, performs its operation and pushes its results +(if any) back on. We can think of ESIL as a post-fix notation of the operations we want to do. So let's see an example: + ``` 4,esp,-=,ebp,esp,=[4] ``` -Can you guess what this is? If we take this post-fix notation and transform it back to in-fix we get + +Can you guess what this is? If we take this post-fix notation and transform it back to in-fix we get: + ``` esp -= 4 4bytes(dword) [esp] = ebp ``` + We can see that this corresponds to the x86 instruction `push ebp`! Isn't that cool? -The aim is to be able to express most of the common operations performed by CPUs, like binary arithmetic operations, memory loads and stores, processing syscalls. This way if we can transform the instructions to ESIL we can see what a program does while it is running even for the most cryptic architectures you definitely don't have a device to debug on for. +The aim is to be able to express most of the common operations performed by CPUs, like binary arithmetic operations, +memory loads and stores, processing syscalls. This way if we can transform the instructions to ESIL we can see +what a program does while it is running even for the most cryptic architectures you definitely don't have a device +to debug on for. ## Using ESIL -rizin's visual mode is great to inspect the ESIL evaluations. +Rizin's visual mode is great to inspect the ESIL evaluations. There are 3 environment variables that are important for watching what a program does: + ``` [0x00000000]> e emu.str=true ``` -`asm.emu` tells rizin if you want ESIL information to be displayed. If it is set to true, you will see comments appear to the right of your disassembly that tell you how the contents of registers and memory addresses are changed by the current instruction. For example, if you have an instruction that subtracts a value from a register it tells you what the value was before and what it becomes after. This is super useful so you don't have to sit there yourself and track which value goes where. +`asm.emu` tells Rizin if you want ESIL information to be displayed. If it is set to true, you will see comments appear +to the right of your disassembly that tell you how the contents of registers and memory addresses are changed +by the current instruction. For example, if you have an instruction that subtracts a value from a register it tells you +what the value was before and what it becomes after. This is super useful so you don't have to sit there yourself and +track which value goes where. -One problem with this is that it is a lot of information to take in at once and sometimes you simply don't need it. rizin has a nice compromise for this. That is what the `emu.str` variable is for (`asm.emustr` on <= 2.2). Instead of this super verbose output with every register value, this only adds really useful information to the output, e.g., strings that are found at addresses a program uses or whether a jump is likely to be taken or not. +One problem with this is that it is a lot of information to take in at once, and sometimes you simply don't need it. +Rizin has a nice compromise for this. That is what the `emu.str` variable is for (`asm.emustr` on <= 2.2). +Instead of this super verbose output with every register value, this only adds really useful information to the output, +e.g., strings that are found at addresses a program uses or whether a jump is likely to be taken or not. -The third important variable is `asm.esil`. This switches your disassembly to no longer show you the actual disassembled instructions, but instead now shows you corresponding ESIL expressions that describe what the instruction does. +The third important variable is `asm.esil`. This switches your disassembly to no longer show you +the actual disassembled instructions, but instead now shows you corresponding ESIL expressions that describe +what the instruction does. So if you want to take a look at how instructions are expressed in ESIL simply set "asm.esil" to true. ``` @@ -91,65 +116,67 @@ ADDR BREAK Here is the complete instruction set used by the ESIL VM: -ESIL Opcode | Operands | Name | Operation| example ---- | --- | --- | --- | ---------------------------------------------- -TRAP | src | Trap | Trap signal | -**$** | src | Syscall | syscall | -**$$** | src | Instruction address | Get address of current instruction
stack=instruction address | -**==** | src,dst | Compare | stack = (dst == src) ;
update_eflags(dst - src) | -**<** | src,dst | Smaller (signed comparison) | stack = (dst < src) ;
update_eflags(dst - src) | [0x0000000]> "ae 1,5,<"
0x0
> "ae 5,5"
0x0" -**<=** | src,dst | Smaller or Equal (signed comparison) | stack = (dst <= src) ;
update_eflags(dst - src) | [0x0000000]> "ae 1,5,<"
0x0
> "ae 5,5"
0x1" -**>** | src,dst | Bigger (signed comparison) | stack = (dst > src) ;
update_eflags(dst - src) | > "ae 1,5,>"
0x1
> "ae 5,5,>"
0x0 - **>=** | src,dst | Bigger or Equal (signed comparison) | stack = (dst >= src) ;
update_eflags(dst - src) | > "ae 1,5,>="
0x1
> "ae 5,5,>="
0x1 - **<<** | src,dst | Shift Left | stack = dst << src | > "ae 1,1,<<"
0x2
> "ae 2,1,<<"
0x4 - **>>** | src,dst | Shift Right | stack = dst >> src | > "ae 1,4,>>"
0x2
> "ae 2,4,>>"
0x1 - **<<<** | src,dst | Rotate Left | stack=dst ROL src | > "ae 31,1,<<<"
0x80000000
> "ae 32,1,<<<"
0x1 -**>>>** | src,dst | Rotate Right | stack=dst ROR src | > "ae 1,1,>>>"
0x80000000
> "ae 32,1,>>>"
0x1 -**&** | src,dst | AND | stack = dst & src | > "ae 1,1,&"
0x1
> "ae 1,0,&"
0x0
> "ae 0,1,&"
0x0
> "ae 0,0,&"
0x0 -**|** | src,dst | OR | stack = dst | src | > "ae 1,1,|"
0x1
> "ae 1,0,|"
0x1
> "ae 0,1,|"
0x1
> "ae 0,0,|"
0x0 -**^** | src,dst | XOR | stack = dst ^src | > "ae 1,1,^"
0x0
> "ae 1,0,^"
0x1
> "ae 0,1,^"
0x1
> "ae 0,0,^"
0x0 -**+** | src,dst | ADD | stack = dst + src | > "ae 3,4,+"
0x7
> "ae 5,5,+"
0xa -**-** | src,dst | SUB | stack = dst - src | > "ae 3,4,-"
0x1
> "ae 5,5,-"
0x0
> "ae 4,3,-"
0xffffffffffffffff -**\*** | src,dst | MUL | stack = dst * src | > "ae 3,4,\*"
0xc
> "ae 5,5,\*"
0x19 -**/** | src,dst | DIV | stack = dst / src | > "ae 2,4,/"
0x2
> "ae 5,5,/"
0x1
> "ae 5,9,/"
0x1 -**%** | src,dst | MOD | stack = dst % src | > "ae 2,4,%"
0x0
> "ae 5,5,%"
0x0
> "ae 5,9,%"
0x4 -**~** | bits,src | SIGNEXT | stack = src sign extended | > "ae 8,0x80,~"
0xffffffffffffff80 -**~/** | src,dst | SIGNED DIV | stack = dst / src (signed) | > "ae 2,-4,~/"
0xfffffffffffffffe -**~%** | src,dst | SIGNED MOD | stack = dst % src (signed) | > "ae 2,-5,~%"
0xffffffffffffffff -**!** | src | NEG | stack = !!!src | > "ae 1,!"
0x0
> "ae 4,!"
0x0
> "ae 0,!"
0x1
-**++** | src | INC | stack = src++ | > ar r_00=0;ar r_00
0x00000000
> "ae r_00,++"
0x1
> ar r_00
0x00000000
> "ae 1,++"
0x2 -**--** | src | DEC | stack = src-- | > ar r_00=5;ar r_00
0x00000005
> "ae r_00,--"
0x4
> ar r_00
0x00000005
> "ae 5,--"
0x4 -**=** | src,reg | EQU | reg = src | > "ae 3,r_00,="
> aer r_00
0x00000003
> "ae r_00,r_01,="
> aer r_01
0x00000003 -**+=** | src,reg | ADD eq | reg = reg + src | > ar r_01=5;ar r_00=0;ar r_00
0x00000000
> "ae r_01,r_00,+="
> ar r_00
0x00000005
> "ae 5,r_00,+="
> ar r_00
0x0000000a -**-=** | src,reg | SUB eq | reg = reg - src | > "ae r_01,r_00,-="
> ar r_00
0x00000004
> "ae 3,r_00,-="
> ar r_00
0x00000001 -**\*=** | src,reg | MUL eq | reg = reg * src | > ar r_01=3;ar r_00=5;ar r_00
0x00000005
> "ae r_01,r_00,\*="
> ar r_00
0x0000000f
> "ae 2,r_00,\*="
> ar r_00
0x0000001e - **/=** | src,reg | DIV eq | reg = reg / src | > ar r_01=3;ar r_00=6;ar r_00
0x00000006
> "ae r_01,r_00,/="
> ar r_00
0x00000002
> "ae 1,r_00,/="
> ar r_00
0x00000002 - **%=** | src,reg | MOD eq | reg = reg % src | > ar r_01=3;ar r_00=7;ar r_00
0x00000007
> "ae r_01,r_00,%="
> ar r_00
0x00000001
> ar r_00=9;ar r_00
0x00000009
> "ae 5,r_00,%="
> ar r_00
0x00000004 -**<<=** | src,reg | Shift Left eq | reg = reg << src | > ar r_00=1;ar r_01=1;ar r_01
0x00000001
> "ae r_00,r_01,<<="
> ar r_01
0x00000002
> "ae 2,r_01,<<="
> ar r_01
0x00000008 -**>>=** | src,reg | Shift Right eq | reg = reg << src | > ar r_00=1;ar r_01=8;ar r_01
0x00000008
> "ae r_00,r_01,>>="
> ar r_01
0x00000004
> "ae 2,r_01,>>="
> ar r_01
0x00000001 -**&=** | src,reg | AND eq | reg = reg & src | > ar r_00=2;ar r_01=6;ar r_01
0x00000006
> "ae r_00,r_01,&="
> ar r_01
0x00000002
> "ae 2,r_01,&="
> ar r_01
0x00000002
> "ae 1,r_01,&="
> ar r_01
0x00000000 -**|=** | src,reg | OR eq| reg = reg | src | > ar r_00=2;ar r_01=1;ar r_01
0x00000001
> "ae r_00,r_01,|="
> ar r_01
0x00000003
> "ae 4,r_01,|="
> ar r_01
0x00000007 - **^=** | src,reg | XOR eq | reg = reg ^ src | > ar r_00=2;ar r_01=0xab;ar r_01
0x000000ab
> "ae r_00,r_01,^="
> ar r_01
0x000000a9
> "ae 2,r_01,^="
> ar r_01
0x000000ab -**++=** | reg | INC eq | reg = reg + 1 | > ar r_00=4;ar r_00
0x00000004
> "ae r_00,++="
> ar r_00
0x00000005 -**--=** | reg | DEC eq | reg = reg - 1 | > ar r_00=4;ar r_00
0x00000004
> "ae r_00,--="
> ar r_00
0x00000003 -**!=** | reg | NOT eq | reg = !reg | > ar r_00=4;ar r_00
0x00000004
> "ae r_00,!="
> ar r_00
0x00000000
> "ae r_00,!="
> ar r_00
0x00000001 ---- | --- | --- | --- | ---------------------------------------------- -=[]
=[\*]
=[1]
=[2]
=[4]
=[8] | src,dst | poke |\*dst=src |
> "ae 0xdeadbeef,0x10000,=[4],"

> pxw 4@0x10000
0x00010000 0xdeadbeef ....

> "ae 0x0,0x10000,=[4],"

> pxw 4@0x10000
0x00010000 0x00000000 -[]
[\*]
[1]
[2]
[4]
[8] | src | peek | stack=\*src |
> w test@0x10000

> "ae 0x10000,[4],"
0x74736574

> ar r_00=0x10000

> "ae r_00,[4],"
0x74736574 -|=[]
|=[1]
|=[2]
|=[4]
|=[8] | reg | nombre | code | >
> -SWAP | | Swap | Swap two top elements | SWAP -PICK | n | Pick | Pick nth element
from the top of the stack | 2,PICK -RPICK | m | Reverse Pick | Pick nth element
from the base of the stack | 0,RPICK -DUP | | Duplicate | Duplicate top element in stack | DUP -NUM | | Numeric | If top element is a reference
(register name, label, etc),
dereference it and push its real value | NUM -CLEAR | | Clear | Clear stack | CLEAR -BREAK | | Break | Stops ESIL emulation | BREAK -GOTO | n | Goto | Jumps to Nth ESIL word | GOTO 5 -TODO | | To Do | Stops execution
(reason: ESIL expression not completed) | TODO +| ESIL Opcode | Operands | Name | Operation | example | +|-------------------------------------------------------------------|----------|--------------------------------------|------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| TRAP | src | Trap | Trap signal | | +| **$** | src | Syscall | syscall | | +| **$$** | src | Instruction address | Get address of current instruction
stack=instruction address | | +| **==** | src,dst | Compare | stack = (dst == src) ;
update_eflags(dst - src) | | +| **<** | src,dst | Smaller (signed comparison) | stack = (dst < src) ;
update_eflags(dst - src) | [0x0000000]> "ae 1,5,<"
0x0
> "ae 5,5"
0x0" | +| **<=** | src,dst | Smaller or Equal (signed comparison) | stack = (dst <= src) ;
update_eflags(dst - src) | [0x0000000]> "ae 1,5,<"
0x0
> "ae 5,5"
0x1" | +| **>** | src,dst | Bigger (signed comparison) | stack = (dst > src) ;
update_eflags(dst - src) | > "ae 1,5,>"
0x1
> "ae 5,5,>"
0x0 | +| **>=** | src,dst | Bigger or Equal (signed comparison) | stack = (dst >= src) ;
update_eflags(dst - src) | > "ae 1,5,>="
0x1
> "ae 5,5,>="
0x1 | +| **<<** | src,dst | Shift Left | stack = dst << src | > "ae 1,1,<<"
0x2
> "ae 2,1,<<"
0x4 | +| **>>** | src,dst | Shift Right | stack = dst >> src | > "ae 1,4,>>"
0x2
> "ae 2,4,>>"
0x1 | +| **<<<** | src,dst | Rotate Left | stack=dst ROL src | > "ae 31,1,<<<"
0x80000000
> "ae 32,1,<<<"
0x1 | +| **>>>** | src,dst | Rotate Right | stack=dst ROR src | > "ae 1,1,>>>"
0x80000000
> "ae 32,1,>>>"
0x1 | +| **&** | src,dst | AND | stack = dst & src | > "ae 1,1,&"
0x1
> "ae 1,0,&"
0x0
> "ae 0,1,&"
0x0
> "ae 0,0,&"
0x0 | +| **|** | src,dst | OR | stack = dst | src | > "ae 1,1,|"
0x1
> "ae 1,0,|"
0x1
> "ae 0,1,|"
0x1
> "ae 0,0,|"
0x0 | +| **^** | src,dst | XOR | stack = dst ^src | > "ae 1,1,^"
0x0
> "ae 1,0,^"
0x1
> "ae 0,1,^"
0x1
> "ae 0,0,^"
0x0 | +| **+** | src,dst | ADD | stack = dst + src | > "ae 3,4,+"
0x7
> "ae 5,5,+"
0xa | +| **-** | src,dst | SUB | stack = dst - src | > "ae 3,4,-"
0x1
> "ae 5,5,-"
0x0
> "ae 4,3,-"
0xffffffffffffffff | +| **\*** | src,dst | MUL | stack = dst * src | > "ae 3,4,\*"
0xc
> "ae 5,5,\*"
0x19 | +| **/** | src,dst | DIV | stack = dst / src | > "ae 2,4,/"
0x2
> "ae 5,5,/"
0x1
> "ae 5,9,/"
0x1 | +| **%** | src,dst | MOD | stack = dst % src | > "ae 2,4,%"
0x0
> "ae 5,5,%"
0x0
> "ae 5,9,%"
0x4 | +| **~** | bits,src | SIGNEXT | stack = src sign extended | > "ae 8,0x80,~"
0xffffffffffffff80 | +| **~/** | src,dst | SIGNED DIV | stack = dst / src (signed) | > "ae 2,-4,~/"
0xfffffffffffffffe | +| **~%** | src,dst | SIGNED MOD | stack = dst % src (signed) | > "ae 2,-5,~%"
0xffffffffffffffff | +| **!** | src | NEG | stack = !!!src | > "ae 1,!"
0x0
> "ae 4,!"
0x0
> "ae 0,!"
0x1
| +| **++** | src | INC | stack = src++ | > ar r_00=0;ar r_00
0x00000000
> "ae r_00,++"
0x1
> ar r_00
0x00000000
> "ae 1,++"
0x2 | +| **--** | src | DEC | stack = src-- | > ar r_00=5;ar r_00
0x00000005
> "ae r_00,--"
0x4
> ar r_00
0x00000005
> "ae 5,--"
0x4 | +| **=** | src,reg | EQU | reg = src | > "ae 3,r_00,="
> aer r_00
0x00000003
> "ae r_00,r_01,="
> aer r_01
0x00000003 | +| **+=** | src,reg | ADD eq | reg = reg + src | > ar r_01=5;ar r_00=0;ar r_00
0x00000000
> "ae r_01,r_00,+="
> ar r_00
0x00000005
> "ae 5,r_00,+="
> ar r_00
0x0000000a | +| **-=** | src,reg | SUB eq | reg = reg - src | > "ae r_01,r_00,-="
> ar r_00
0x00000004
> "ae 3,r_00,-="
> ar r_00
0x00000001 | +| **\*=** | src,reg | MUL eq | reg = reg * src | > ar r_01=3;ar r_00=5;ar r_00
0x00000005
> "ae r_01,r_00,\*="
> ar r_00
0x0000000f
> "ae 2,r_00,\*="
> ar r_00
0x0000001e | +| **/=** | src,reg | DIV eq | reg = reg / src | > ar r_01=3;ar r_00=6;ar r_00
0x00000006
> "ae r_01,r_00,/="
> ar r_00
0x00000002
> "ae 1,r_00,/="
> ar r_00
0x00000002 | +| **%=** | src,reg | MOD eq | reg = reg % src | > ar r_01=3;ar r_00=7;ar r_00
0x00000007
> "ae r_01,r_00,%="
> ar r_00
0x00000001
> ar r_00=9;ar r_00
0x00000009
> "ae 5,r_00,%="
> ar r_00
0x00000004 | +| **<<=** | src,reg | Shift Left eq | reg = reg << src | > ar r_00=1;ar r_01=1;ar r_01
0x00000001
> "ae r_00,r_01,<<="
> ar r_01
0x00000002
> "ae 2,r_01,<<="
> ar r_01
0x00000008 | +| **>>=** | src,reg | Shift Right eq | reg = reg << src | > ar r_00=1;ar r_01=8;ar r_01
0x00000008
> "ae r_00,r_01,>>="
> ar r_01
0x00000004
> "ae 2,r_01,>>="
> ar r_01
0x00000001 | +| **&=** | src,reg | AND eq | reg = reg & src | > ar r_00=2;ar r_01=6;ar r_01
0x00000006
> "ae r_00,r_01,&="
> ar r_01
0x00000002
> "ae 2,r_01,&="
> ar r_01
0x00000002
> "ae 1,r_01,&="
> ar r_01
0x00000000 | +| **|=** | src,reg | OR eq | reg = reg | src | > ar r_00=2;ar r_01=1;ar r_01
0x00000001
> "ae r_00,r_01,|="
> ar r_01
0x00000003
> "ae 4,r_01,|="
> ar r_01
0x00000007 | +| **^=** | src,reg | XOR eq | reg = reg ^ src | > ar r_00=2;ar r_01=0xab;ar r_01
0x000000ab
> "ae r_00,r_01,^="
> ar r_01
0x000000a9
> "ae 2,r_01,^="
> ar r_01
0x000000ab | +| **++=** | reg | INC eq | reg = reg + 1 | > ar r_00=4;ar r_00
0x00000004
> "ae r_00,++="
> ar r_00
0x00000005 | +| **--=** | reg | DEC eq | reg = reg - 1 | > ar r_00=4;ar r_00
0x00000004
> "ae r_00,--="
> ar r_00
0x00000003 | +| **!=** | reg | NOT eq | reg = !reg | > ar r_00=4;ar r_00
0x00000004
> "ae r_00,!="
> ar r_00
0x00000000
> "ae r_00,!="
> ar r_00
0x00000001 | +| --- | --- | --- | --- | ---------------------------------------------- | +| =[]
=[\*]
=[1]
=[2]
=[4]
=[8] | src,dst | poke | \*dst=src |
> "ae 0xdeadbeef,0x10000,=[4],"

> pxw 4@0x10000
0x00010000 0xdeadbeef ....

> "ae 0x0,0x10000,=[4],"

> pxw 4@0x10000
0x00010000 0x00000000 | +| []
[\*]
[1]
[2]
[4]
[8] | src | peek | stack=\*src |
> w test@0x10000

> "ae 0x10000,[4],"
0x74736574

> ar r_00=0x10000

> "ae r_00,[4],"
0x74736574 | +| |=[]
|=[1]
|=[2]
|=[4]
|=[8] | reg | nombre | code | >
> | +| SWAP | | Swap | Swap two top elements | SWAP | +| PICK | n | Pick | Pick nth element
from the top of the stack | 2,PICK | +| RPICK | m | Reverse Pick | Pick nth element
from the base of the stack | 0,RPICK | +| DUP | | Duplicate | Duplicate top element in stack | DUP | +| NUM | | Numeric | If top element is a reference
(register name, label, etc),
dereference it and push its real value | NUM | +| CLEAR | | Clear | Clear stack | CLEAR | +| BREAK | | Break | Stops ESIL emulation | BREAK | +| GOTO | n | Goto | Jumps to Nth ESIL word | GOTO 5 | +| TODO | | To Do | Stops execution
(reason: ESIL expression not completed) | TODO | ### ESIL Flags -ESIL VM has an internal state flags that are read-only and can be used to export those values to the underlying target CPU flags. It is because the ESIL VM always calculates all flag changes, while target CPUs only update flags under certain conditions or at specific instructions. +ESIL VM has an internal state flags that are read-only and can be used to export those values to the underlying +target CPU flags. It is because the ESIL VM always calculates all flag changes, while target CPUs only update flags +under certain conditions or at specific instructions. Internal flags are prefixed with `$` character. @@ -170,15 +197,21 @@ js - jump target set ``` ## Syntax and Commands + A target opcode is translated into a comma separated list of ESIL expressions. + ``` xor eax, eax -> 0,eax,=,1,zf,= ``` + Memory access is defined by brackets operation: + ``` mov eax, [0x80480] -> 0x80480,[],eax,= ``` + Default operand size is determined by size of operation destination. + ``` movb $0, 0x80480 -> 0,0x80480,=[1] ``` @@ -193,18 +226,20 @@ cmp eax, 123 -> 123,eax,==,$z,zf,= jz eax -> zf,?{,eax,eip,=,} ``` - If you want to run several expressions under a conditional, put them in curly braces: + ``` zf,?{,eip,esp,=[],eax,eip,=,$r,esp,-=,} ``` Whitespaces, newlines and other chars are ignored. So the first thing when processing a ESIL program is to remove spaces: + ``` esil = r_str_replace (esil, " ", "", R_TRUE); ``` -Syscalls need special treatment. They are indicated by '$' at the beginning of an expression. You can pass an optional numeric value to specify a number of syscall. An ESIL emulator must handle syscalls. See (r_esil_syscall). +Syscalls need special treatment. They are indicated by '$' at the beginning of an expression. You can pass +an optional numeric value to specify a number of syscall. An ESIL emulator must handle syscalls. See (r_esil_syscall). ## Arguments Order for Non-associative Operations @@ -214,17 +249,22 @@ As discussed on IRC, the current implementation works like this: a,b,- b - a a,b,/= b /= a ``` + This approach is more readable, but it is less stack-friendly. ### Special Instructions -NOPs are represented as empty strings. As it was said previously, syscalls are marked by '$' command. For example, '0x80,$'. It delegates emulation from the ESIL machine to a callback which implements syscalls for a specific OS/kernel. +NOPs are represented as empty strings. As it was said previously, syscalls are marked by '$' command. For example, +'0x80,$'. It delegates emulation from the ESIL machine to a callback which implements syscalls for a specific OS/kernel. -Traps are implemented with the `TRAP` command. They are used to throw exceptions for invalid instructions, division by zero, memory read error, or any other needed by specific architectures. +Traps are implemented with the `TRAP` command. They are used to throw exceptions for invalid instructions, +division by zero, memory read error, or any other needed by specific architectures. ### Quick Analysis -Here is a list of some quick checks to retrieve information from an ESIL string. Relevant information will be probably found in the first expression of the list. +Here is a list of some quick checks to retrieve information from an ESIL string. Relevant information will be probably +found in the first expression of the list. + ``` indexOf('[') -> have memory references indexOf("=[") -> write in memory @@ -252,13 +292,15 @@ Common operations: ### CPU Flags -CPU flags are usually defined as single bit registers in the RReg profile. They are sometimes found under the 'flg' register type. +CPU flags are usually defined as single bit registers in the RReg profile. They are sometimes found under +the 'flg' register type. ### Variables Properties of the VM variables: -1. They have no predefined bit width. This way it should be easy to extend them to 128, 256 and 512 bits later, e.g. for MMX, SSE, AVX, Neon SIMD. +1. They have no predefined bit width. This way it should be easy to extend them to 128, 256 and 512 bits later, + for MMX, SSE, AVX, Neon SIMD. 2. There can be unbound number of variables. It is done for SSA-form compatibility. @@ -294,11 +336,14 @@ What to do with them? What about bit arithmetic if use variables instead of regi ### Floating Point Unit Support -At the moment of this writing, ESIL does not yet support FPU. But you can implement support for unsupported instructions using rz-pipe. Eventually we will get proper support for multimedia and floating point. +At the moment of this writing, ESIL does not yet support FPU. But you can implement support for unsupported +instructions using rz-pipe. Eventually we will get proper support for multimedia and floating point. ### Handling x86 REP Prefix in ESIL -ESIL specifies that the parsing control-flow commands must be uppercase. Bear in mind that some architectures have uppercase register names. The corresponding register profile should take care not to reuse any of the following: +ESIL specifies that the parsing control-flow commands must be uppercase. Bear in mind that some architectures have +uppercase register names. The corresponding register profile should take care not to reuse any of the following: + ``` 3,SKIP - skip N instructions. used to make relative forward GOTOs 3,GOTO - goto instruction 3 @@ -311,13 +356,15 @@ CLEAR - clear stack #### Usage Example: rep cmpsb + ``` cx,!,?{,BREAK,},esi,[1],edi,[1],==,?{,BREAK,},esi,++,edi,++,cx,--,0,GOTO ``` ### Unimplemented/Unhandled Instructions -Those are expressed with the 'TODO' command. They act as a 'BREAK', but displays a warning message describing that an instruction is not implemented and will not be emulated. For example: +Those are expressed with the 'TODO' command. They act as a 'BREAK', but displays a warning message describing that +an instruction is not implemented and will not be emulated. For example: ``` fmulp ST(1), ST(0) => TODO,fmulp ST(1),ST(0) @@ -358,14 +405,17 @@ fmulp ST(1), ST(0) => TODO,fmulp ST(1),ST(0) ### Introspection -To ease ESIL parsing we should have a way to express introspection expressions to extract the data that we want. For example, we may want to get the target address of a jump. The parser for ESIL expressions should offer an API to make it possible to extract information by analyzing the expressions easily. +To ease ESIL parsing we should have a way to express introspection expressions to extract the data that we want. +For example, we may want to get the target address of a jump. The parser for ESIL expressions should offer an API +to make it possible to extract information by analyzing the expressions easily. ``` > ao~esil,opcode opcode: jmp 0x10000465a esil: 0x10000465a,rip,= ``` -We need a way to retrieve the numeric value of 'rip'. This is a very simple example, but there are more complex, like conditional ones. We need expressions to be able to get: +We need a way to retrieve the numeric value of 'rip'. This is a very simple example, but there are more complex, +like conditional ones. We need expressions to be able to get: - opcode type - destination of a jump @@ -375,18 +425,24 @@ We need a way to retrieve the numeric value of 'rip'. This is a very simple exam ### API HOOKS -It is important for emulation to be able to setup hooks in the parser, so we can extend it to implement analysis without having to change it again and again. That is, every time an operation is about to be executed, a user hook is called. It can be used for example to determine if `RIP` is going to change, or if the instruction updates the stack. -Later, we can split that callback into several ones to have an event-based analysis API that may be extended in JavaScript like this: +It is important for emulation to be able to setup hooks in the parser, so we can extend it to implement analysis +without having to change it again and again. That is, every time an operation is about to be executed, +a user hook is called. It can be used for example to determine if `RIP` is going to change, or if the instruction +updates the stack. Later, we can split that callback into several ones to have an event-based analysis API that +may be extended in JavaScript like this: ``` esil.on('regset', function(){.. esil.on('syscall', function(){esil.regset('rip' ``` -For the API, see the functions `hook_flag_read()`, `hook_execute()` and `hook_mem_read()`. A callback should return true or 1 if you want to override the action that it takes. For example, to deny memory reads in a region, or voiding memory writes, effectively making it read-only. -Return false or 0 if you want to trace ESIL expression parsing. +For the API, see the functions `hook_flag_read()`, `hook_execute()` and `hook_mem_read()`. A callback should return +true or 1 if you want to override the action that it takes. For example, to deny memory reads in a region, +or voiding memory writes, effectively making it read-only. Return false or 0 if you want to trace ESIL expression +parsing. -Other operations require bindings to external functionalities to work. In this case, `rz_ref` and `rz_io`. This must be defined when initializing the ESIL VM. +Other operations require bindings to external functionalities to work. In this case, `rz_ref` and `rz_io`. +This must be defined when initializing the ESIL VM. * Io Get/Set ``` diff --git a/src/disassembling/intro.md b/src/disassembling/intro.md index 21eb95d4..59776940 100644 --- a/src/disassembling/intro.md +++ b/src/disassembling/intro.md @@ -1,28 +1,27 @@ # Disassembling -Disassembling in rizin is just a way to represent an array of bytes. It is handled as a special print mode within `p` command. - -In the old times, when the rizin core was smaller, the disassembler was handled by an external rsc file. That is, rizin first dumped current block into a file, and then simply called `objdump` configured to disassemble for Intel, ARM or other supported architectures. - -It was a working and unix friendly solution, but it was inefficient as it repeated the same expensive actions over and over, because there were no caches. As a result, scrolling was terribly slow. - -So there was a need to create a generic disassembler library to support multiple plugins for different architectures. We can list the current loaded plugins with +Disassembling in Rizin is just a way to represent an array of bytes. It is handled as a special print mode within +`p` command. ``` $ rz-asm -L ``` -Or from inside rizin: +Or from inside Rizin: ``` > e asm.arch=?? ``` -This was many years before capstone appeared. So rizin was using udis86 and olly disassemblers, many gnu (from binutils). +This was many years before capstone appeared. So Rizin was using udis86 and olly disassemblers, +many gnu (from binutils). -Nowadays, the disassembler support is one of the basic features of rizin. It now has many options, endianness, including target architecture flavor and disassembler variants, among other things. +Nowadays, the disassembler support is one of the basic features of Rizin. It now has many options, endianness, +including target architecture flavor and disassembler variants, among other things. -To see the disassembly, use the `pd` command. It accepts a numeric argument to specify how many opcodes of current block you want to see. Most of the commands in rizin consider the current block size as the default limit for data input. If you want to disassemble more bytes, set a new block size using the `b` command. +To see the disassembly, use the `pd` command. It accepts a numeric argument to specify how many opcodes +of current block you want to see. Most of the commands in Rizin consider the current block size as the default limit +for data input. If you want to disassemble more bytes, set a new block size using the `b` command. ``` [0x00000000]> b 100 ; set block size to 100 @@ -30,7 +29,8 @@ To see the disassembly, use the `pd` command. It accepts a numeric argument to s [0x00000000]> pd 3 ; disassemble 3 opcodes [0x00000000]> pD 30 ; disassemble 30 bytes ``` -You can also pass negative numbers as the numeric argument, if you want to disassemble something that lies before the current offset: +You can also pass negative numbers as the numeric argument, if you want to disassemble something that lies before +the current offset: ``` [0x00005bc0]> pd -2 @@ -44,9 +44,11 @@ You can also pass negative numbers as the numeric argument, if you want to disas The `pD` command works like `pd` but accepts the number of input bytes as its argument, instead of the number of opcodes. -You can also get information about the pointer chains using the command `pdp`. This can be helpful while dealing with ROP chains. +You can also get information about the pointer chains using the command `pdp`. This can be helpful while dealing +with ROP chains. -The "pseudo" syntax may be somewhat easier for a human to understand than the default assembler notations. But it can become annoying if you read lots of code. To play with it: +The "pseudo" syntax may be somewhat easier for a human to understand than the default assembler notations. +But it can become annoying if you read lots of code. To play with it: ``` [0x00405e1c]> e asm.pseudo=true @@ -71,4 +73,24 @@ The "pseudo" syntax may be somewhat easier for a human to understand than the de 0x00405e2d 4889d8 mov %rbx, %rax ``` -And as always, you can print the disassembly in JSON using `pdj` and get more information about the other associated commands by running `pd?`. +And as always, you can print the disassembly in JSON using `pdj` and get more information about the other associated +commands by running `pd?`: + +``` +[0x00000000]> pd? +Usage: pd[?] # Print Disassembly +| pd[jqt] [] # Disassemble N instructions (can be negative) +| pda[jq=] # Disassemble all possible opcodes (byte per byte) +| pdb[jJ] # Disassemble basic block +| pdC [] # Prints the comments found in N instructions +| pde[jqQ] [] # Disassemble N instructions following execution flow from current PC +| pdf[js] # Disassemble a function +| pdJ[?] [] # Disassemble N instructions as json containing the printed text +| pdk # Disassemble all methods of a class +| pdl[j] [] # Disassemble N instructions and prints its sizes +| pdp[jq] [] # Disassemble instructions and follows pointers to read ropchains +| pdr[j.] # Disassemble recursively across the function graph +| pdR[jq] # Disassemble recursively the block size bytes without analyzing functions +| pds[fb] # Summarize N bytes or current block or a function (strings, calls, jumps, refs) +| pdg[?] # Native Ghidra decompiler and Sleigh Disassembler plugin +``` diff --git a/src/plugins/debug.md b/src/plugins/debug.md index 86654a71..5260aa43 100644 --- a/src/plugins/debug.md +++ b/src/plugins/debug.md @@ -2,7 +2,8 @@ It is common to have an issues when you write a plugin, especially if you do this for the first time. This is why debugging them is very important. The first step for debugging is -to set an environment variable when running rizin instance: +to set an environment variable when running Rizin instance: + ``` R_DEBUG=yes rizin /bin/ls Loading /usr/local/lib/rizin/2.2.0-git//bin_xtr_dyldcache.so diff --git a/src/plugins/dev-analysis.md b/src/plugins/dev-analysis.md index 8068f840..85fe54be 100644 --- a/src/plugins/dev-analysis.md +++ b/src/plugins/dev-analysis.md @@ -2,10 +2,10 @@ After implementing disassembly plugin, you might have noticed that output is far from being good - no proper highlighting, no reference lines -and so on. This is because rizin requires every architecture plugin +and so on. This is because Rizin requires every architecture plugin to provide also analysis information about every opcode. At the moment the implementation of disassembly and opcodes analysis is separated between -two modules - RzAsm and RzAnalysis. Thus we need to write an analysis plugin too. +two modules - RzAsm and RzAnalysis. Thus, we need to write an analysis plugin too. The principle is very similar - you just need to create a C file and corresponding Makefile. @@ -57,7 +57,8 @@ uninstall: ``` **analysis_mycpu.c:** -This is a dummy example please go check real life examples [in the source](https://github.com/rizinorg/rizin/blob/dev/librz/analysis/p/analysis_snes.c). +This is a dummy example please go check real life examples +[in the source](https://github.com/rizinorg/rizin/blob/dev/librz/analysis/p/analysis_snes.c). ```c /* rizin - LGPL - Copyright 2022 - user */ @@ -122,10 +123,13 @@ RZ_API RzLibStruct rizin_plugin = { }; #endif ``` + After compiling rizin will list this plugin in the rz-asm output: + ``` _dA_ _8_16 mycpu LGPL3 MYCPU disassembly plugin ``` + Note the `A` just appeared on the left column (a=asm, d=disasm, A=analyze, e=ESIL). Examples: diff --git a/src/plugins/dev-asm.md b/src/plugins/dev-asm.md index 773ddeb0..120a3458 100644 --- a/src/plugins/dev-asm.md +++ b/src/plugins/dev-asm.md @@ -5,6 +5,7 @@ are fluent in C. For various reasons it might be easier to implement it out of t will need to create single C file, called `asm_mycpu.c` and a meson file for it. The key thing of RzAsm plugin is a structure + ```c RzAsmPlugin rz_asm_plugin_mycpu = { .name = "mycpu", @@ -86,6 +87,7 @@ RZ_API RzLibStruct rizin_plugin = { ``` After compiling rizin will list this plugin in the rz-asm output: + ``` $ rz-asm -L |grep myc _d__ _8_32 mycpu LGPL3 MYCPU disassembly plugin @@ -93,12 +95,14 @@ _d__ _8_32 mycpu LGPL3 MYCPU disassembly plugin ### Moving plugin into the tree -Pushing a new architecture into the main branch of rizin requires to modify several files in order to make it fit into the way the rest of plugins are built. +Pushing a new architecture into the main branch of rizin requires to modify several files in order to make it fit +into the way the rest of plugins are built. __List of affected files:__ * `librz/asm/p/asm_mycpu.c` -That's where most of our code will be, the key part is to declare a `RzAsmPlugin` containing a valid `disassemble` field, a function pointer to the actual disassembler function. +That's where most of our code will be, the key part is to declare a `RzAsmPlugin` containing a valid `disassemble` +field, a function pointer to the actual disassembler function. * `librz/asm/meson.build` The build is handled by meson, we have to add our plugin to the list of things to be compiled : @@ -127,7 +131,7 @@ The build is handled by meson, we have to add our plugin to the list of things t ``` * `librz/include/rz_asm.h` -Make Rizin aware of our plugin by defining our struct : +Make Rizin aware of our plugin by defining our struct: ```diff @@ -265,6 +265,7 @@ extern RzAsmPlugin rz_asm_plugin_xcore_cs; extern RzAsmPlugin rz_asm_plugin_xtensa; diff --git a/src/plugins/dev-other.md b/src/plugins/dev-other.md index 947110c5..a939d6c2 100644 --- a/src/plugins/dev-other.md +++ b/src/plugins/dev-other.md @@ -4,9 +4,10 @@ * Adding the registers profile and architecture support in the librz/debug/p/debug_native.c and librz/debug/p/debug_gdb.c * Add the code to apply the profiles into the function `rz_debug_gdb_attach(RzDebug *dbg, int pid)` -If you want to add support for the gdb, you can see the register profile in the active gdb session using command `maint print registers`. +If you want to add support for the gdb, you can see the register profile in the active gdb session using command +`maint print registers`. -## More to come.. +## More to come... * Related article: http://rizin.today/posts/extending-r2-with-new-plugins/ @@ -27,6 +28,6 @@ Some commits related to "Implementing a new architecture" ## Implementing a new pseudo architecture -This is an simple plugin for z80 that you may use as example: +This is a simple plugin for z80 that you may use as example: -https://github.com/rizinorg/rizin/commit/8ff6a92f65331cf8ad74cd0f44a60c258b137a06 +* https://github.com/rizinorg/rizin/commit/8ff6a92f65331cf8ad74cd0f44a60c258b137a06 diff --git a/src/plugins/dev.md b/src/plugins/dev.md index fcdc97fb..b2e86956 100644 --- a/src/plugins/dev.md +++ b/src/plugins/dev.md @@ -1,6 +1,7 @@ ## Implementing a new architecture -rizin splits the logic of a CPU into several modules. You should write more than one plugin to get full support for a specific arch. Let's see which are those: +Rizin splits the logic of a CPU into several modules. You should write more than one plugin to get full support +for a specific arch. Let's see which are those: * rz_asm : assembler and disassembler * rz_analysis : code analysis (opcode,type,esil,..) @@ -8,17 +9,21 @@ rizin splits the logic of a CPU into several modules. You should write more than * rz_syscall : system calls * rz_debug : debugger -The most basic feature you usually want to support from a specific architecture is the disassembler. You first need to read into a human readable form the bytes in there. +The most basic feature you usually want to support from a specific architecture is the disassembler. You first need to +read into a human-readable form the bytes in there. -Bear in mind that plugins can be compiled static or dynamically, this means that the arch will be embedded inside the core libraries or it will distributed as a separated shared library. +Bear in mind that plugins can be compiled static or dynamically, this means that the arch will be embedded inside +the core libraries, or it will distribute as a separated shared library. You may find some examples of external plugins in [rizin-extras](https://github.com/rizinorg/rizin-extras) repository. ## Writing the rz_asm plugin -The official way to make third-party plugins is to distribute them into a separate repository. This is a sample disasm plugin: +The official way to make third-party plugins is to distribute them into a separate repository. This is a sample +disasm plugin: `meson.build` file: + ```meson project('rizin-mycpu', 'c') @@ -35,6 +40,7 @@ library('asm_mycpu', ['mycpu.c'], ``` `mycpu.c` file: + ```c /* example rz_asm plugin by pancake at 2014 */ diff --git a/src/plugins/intro.md b/src/plugins/intro.md index 92a0773e..e7573120 100644 --- a/src/plugins/intro.md +++ b/src/plugins/intro.md @@ -1,6 +1,6 @@ # Plugins -rizin is implemented on top of a bunch of libraries, almost every of those +Rizin is implemented on top of a bunch of libraries, almost every of those libraries support plugins to extend the capabilities of the library or add support for different targets. @@ -25,18 +25,21 @@ reg # arch register logic ## Listing plugins -Some rizin tools have the `-L` flag to list all the plugins associated to the +Some Rizin tools have the `-L` flag to list all the plugins associated to the functionality. + ``` rz-asm -L # list asm plugins rizin -L # list io plugins rz-bin -L # list bin plugins rz-hash -L # list hash/crypto/encoding plugins ``` -There are more plugins in rizin ecosystem, we can list them from inside rizin, and this is + +There are more plugins in Rizin ecosystem, we can list them from inside Rizin, and this is done by using the `L` suffix. Those are some of the commands: + ``` L # list core plugins iL # list bin plugins @@ -51,7 +54,8 @@ You can use the `?` as value to get the possible values in the associated eval v e asm.arch=? # list assembler/disassembler plugins e analysis.arch=? # list analysis plugins ``` + ## Notes -Note there are some inconsistencies that most likely will be fixed in the future rizin versions. +Note there are some inconsistencies that most likely will be fixed in the future Rizin versions. diff --git a/src/plugins/ioplugins.md b/src/plugins/ioplugins.md index 05b7af82..d07fa89a 100644 --- a/src/plugins/ioplugins.md +++ b/src/plugins/ioplugins.md @@ -1,21 +1,27 @@ # IO plugins -All access to files, network, debugger and all input/output in general is wrapped by an IO abstraction layer that allows rizin to treat all data as if it were just a file. +All access to files, network, debugger and all input/output in general is wrapped by an IO abstraction layer that +allows Rizin to treat all data as if it were just a file. -IO plugins are the ones used to wrap the open, read, write and 'system' on virtual file systems. You can make rizin understand anything as a plain file. E.g. a socket connection, a remote rizin session, a file, a process, a device, a gdb session. +IO plugins are the ones used to wrap the open, read, write and 'system' on virtual file systems. You can make Rizin +understand anything as a plain file. E.g. a socket connection, a remote rizin session, a file, a process, a device, +a gdb session. -So, when rizin reads a block of bytes, it is the task of an IO plugin to get these bytes from any place and put them into internal buffer. An IO plugin is chosen by a file's URI to be opened. Some examples: +So, when rizin reads a block of bytes, it is the task of an IO plugin to get these bytes from any place and put them +into internal buffer. An IO plugin is chosen by a file's URI to be opened. Some examples: * Debugging URIs ``` $ rizin dbg:///bin/ls
$ rizin pid://1927 ``` + * Remote sessions ``` $ rizin rap://:1234
$ rizin rap://:1234//bin/ls ``` + * Virtual buffers ``` $ rizin malloc://512
diff --git a/src/plugins/python.md b/src/plugins/python.md index 59ac1df5..c6394dff 100644 --- a/src/plugins/python.md +++ b/src/plugins/python.md @@ -4,105 +4,110 @@ Note - in the following examples there are missing functions of the actual decod for the sake of readability! For this you need to do this: + 1. `import rzlang` and `from rzlang import RZ` (for constants) 2. Make a function with 2 subfunctions - `assemble` and `disassemble` and returning plugin structure - for RzAsm plugin -```python -def mycpu(a): - def assemble(s): - return [1, 2, 3, 4] - - def disassemble(memview, addr): - try: - opcode = get_opcode(memview) # https://docs.python.org/3/library/stdtypes.html#memoryview - opstr = optbl[opcode][1] - return [4, opstr] - except: - return [4, "unknown"] -``` + ```python + def mycpu(a): + def assemble(s): + return [1, 2, 3, 4] + + def disassemble(memview, addr): + try: + opcode = get_opcode(memview) # https://docs.python.org/3/library/stdtypes.html#memoryview + opstr = optbl[opcode][1] + return [4, opstr] + except: + return [4, "unknown"] + ``` 3. This structure should contain a pointers to these 2 functions - `assemble` and `disassemble` -```python - return { - "name" : "mycpu", - "arch" : "mycpu", - "bits" : 32, - "endian" : RZ.RZ_SYS_ENDIAN_LITTLE, - "license" : "GPL", - "desc" : "MYCPU disasm", - "assemble" : assemble, - "disassemble" : disassemble, - } -``` + ```python + return { + "name" : "mycpu", + "arch" : "mycpu", + "bits" : 32, + "endian" : RZ.RZ_SYS_ENDIAN_LITTLE, + "license" : "GPL", + "desc" : "MYCPU disasm", + "assemble" : assemble, + "disassemble" : disassemble, + } + ``` + 4. Make a function with 2 subfunctions - `set_reg_profile` and `op` and returning plugin structure - for RzAnalysis plugin -```python -def mycpu_analysis(a): - def set_reg_profile(): - profile = "=PC pc\n" + \ - "=SP sp\n" + \ - "gpr r0 .32 0 0\n" + \ - "gpr r1 .32 4 0\n" + \ - "gpr r2 .32 8 0\n" + \ - "gpr r3 .32 12 0\n" + \ - "gpr r4 .32 16 0\n" + \ - "gpr r5 .32 20 0\n" + \ - "gpr sp .32 24 0\n" + \ - "gpr pc .32 28 0\n" - return profile - - def op(memview, pc): - analysisop = { - "type" : RZ.RZ_ANALYSIS_OP_TYPE_NULL, - "cycles" : 0, - "stackop" : 0, - "stackptr" : 0, - "ptr" : -1, - "jump" : -1, - "addr" : 0, - "eob" : False, - "esil" : "", - } - try: - opcode = get_opcode(memview) # https://docs.python.org/3/library/stdtypes.html#memoryview - esilstr = optbl[opcode][2] - if optbl[opcode][0] == "J": # it's jump - analysisop["type"] = RZ.RZ_ANALYSIS_OP_TYPE_JMP - analysisop["jump"] = decode_jump(opcode, j_mask) - esilstr = jump_esil(esilstr, opcode, j_mask) - - except: - result = analysisop - # Don't forget to return proper instruction size! - return [4, result] + ```python + def mycpu_analysis(a): + def set_reg_profile(): + profile = "=PC pc\n" + \ + "=SP sp\n" + \ + "gpr r0 .32 0 0\n" + \ + "gpr r1 .32 4 0\n" + \ + "gpr r2 .32 8 0\n" + \ + "gpr r3 .32 12 0\n" + \ + "gpr r4 .32 16 0\n" + \ + "gpr r5 .32 20 0\n" + \ + "gpr sp .32 24 0\n" + \ + "gpr pc .32 28 0\n" + return profile + + def op(memview, pc): + analysisop = { + "type" : RZ.RZ_ANALYSIS_OP_TYPE_NULL, + "cycles" : 0, + "stackop" : 0, + "stackptr" : 0, + "ptr" : -1, + "jump" : -1, + "addr" : 0, + "eob" : False, + "esil" : "", + } + try: + opcode = get_opcode(memview) # https://docs.python.org/3/library/stdtypes.html#memoryview + esilstr = optbl[opcode][2] + if optbl[opcode][0] == "J": # it's jump + analysisop["type"] = RZ.RZ_ANALYSIS_OP_TYPE_JMP + analysisop["jump"] = decode_jump(opcode, j_mask) + esilstr = jump_esil(esilstr, opcode, j_mask) + + except: + result = analysisop + # Don't forget to return proper instruction size! + return [4, result] + ``` -``` 5. This structure should contain a pointers to these 2 functions - `set_reg_profile` and `op` -```python - return { - "name" : "mycpu", - "arch" : "mycpu", - "bits" : 32, - "license" : "GPL", - "desc" : "MYCPU analysis", - "esil" : 1, - "set_reg_profile" : set_reg_profile, - "op" : op, - } -``` + ```python + return { + "name" : "mycpu", + "arch" : "mycpu", + "bits" : 32, + "license" : "GPL", + "desc" : "MYCPU analysis", + "esil" : 1, + "set_reg_profile" : set_reg_profile, + "op" : op, + } + ``` + 6. Then register those using `rzlang.plugin("asm")` and `rzlang.plugin("analysis")` respectively -```python -print("Registering MYCPU disasm plugin...") -print(rzlang.plugin("asm", mycpu)) -print("Registering MYCPU analysis plugin...") -print(rzlang.plugin("analysis", mycpu_analysis)) -``` + ```python + print("Registering MYCPU disasm plugin...") + print(rzlang.plugin("asm", mycpu)) + print("Registering MYCPU analysis plugin...") + print(rzlang.plugin("analysis", mycpu_analysis)) + ``` You can combine everything in one file and load it using `-i` option: + ``` rizin -I mycpu.py some_file.bin ``` + Or you can load it from the rizin shell: `#!python mycpu.py` ### Implementing new format plugin in Python @@ -112,6 +117,7 @@ for the sake of readability! For this you need to do this: 1. `import rzlang` + 2. Make a function with subfunctions: - `load` - `load_bytes` @@ -126,68 +132,72 @@ For this you need to do this: - `info` and returning plugin structure - for RzAsm plugin -```python -def le_format(a): - def load(binf): - return [0] - - def check_bytes(buf): - try: - if buf[0] == 77 and buf[1] == 90: - lx_off, = struct.unpack(": ``` -Note that the following command does the same, rizin will use the debug plugin specified by the uri if found. +Note that the following command does the same, Rizin will use the debug plugin specified by the uri if found. ``` $ rizin -D gdb gdb://: @@ -30,9 +30,9 @@ which rebases the current session's data after opening gdb [0x00404870]> doof gdb://:/ ``` -After connecting, you can use the standard rizin debug commands as normal. +After connecting, you can use the standard Rizin debug commands as normal. -rizin does not yet load symbols from gdbserver, so it needs the binary to +Rizin does not yet load symbols from gdbserver, so it needs the binary to be locally present to load symbols from it. In case symbols are not loaded even if the binary is present, you can try specifying the path with `e dbg.exe.path`: @@ -92,15 +92,15 @@ Use `R!rd` to print the currently available reverse debugging capabilities. If you are interested in debugging rizin's interaction with gdbserver you can use `R!monitor set remote-debug 1` to turn on logging of gdb's remote protocol packets in gdbserver's console and `R!monitor set debug 1` to show general debug messages from -gdbserver in it's console. +gdbserver in its console. rizin also provides its own gdbserver implementation: ``` $ rizin = [0x00000000]> Rg? -|Usage: =[g] [...] # gdb server -| gdbserver: +Usage: R[g] [...] # gdb server +gdbserver: | Rg port file [args] listen on 'port' debugging 'file' using gdbserver | Rg! port file [args] same as above, but debug protocol messages (like gdbserver --remote-debug) ``` @@ -117,4 +117,3 @@ And then connect to it like you would to any gdbserver. For example, with rizin: ``` $ rizin -d gdb://localhost:8000 ``` - diff --git a/src/remote_access/remoting_capabilities.md b/src/remote_access/remoting_capabilities.md index 963f89ee..33a6f7cc 100644 --- a/src/remote_access/remoting_capabilities.md +++ b/src/remote_access/remoting_capabilities.md @@ -1,7 +1,8 @@ # Remote Access Capabilities -Rizin can be run locally, or it can be started as a server process which is controlled by a local -rizin process. This is possible because everything uses rizin's IO subsystem which abstracts access to system(), cmd() and all basic IO operations so to work over a network. +Rizin can be run locally, or it can be started as a server process which is controlled by a local rizin process. +This is possible because everything uses Rizin's IO subsystem which abstracts access to system(), cmd() and all +basic IO operations so to work over a network. Help for commands useful for remote access to rizin: @@ -21,7 +22,7 @@ Usage: R[?] # Connect with other instances of rizin | Rh[?] # Start the http webserver | RH[?] # Start the http webserver (and launch the web browser) | Rt <[host:]port> [] # Start the tcp server -| R&r # Start rap server in background (same as '&_=h') +| R&r # Start rap server in background (same as '& Rr') ``` You can learn rizin remote capabilities by displaying the list of supported IO plugins: `rizin -L`. @@ -89,7 +90,8 @@ To remove hosts (and close connections): [0x004048c5]> R- ``` -You can also redirect rizin output to a TCP or UDP server (such as `nc -l`). First, Add the server with 'R+ tcp://' or 'R+ udp://', then you can redirect the output of a command to be sent to the server: +You can also redirect rizin output to a TCP or UDP server (such as `nc -l`). First, Add the server with `R+ tcp://` or +`R+ udp://`, then you can redirect the output of a command to be sent to the server: ``` [0x004048c5]> R+ tcp://:/ @@ -98,5 +100,5 @@ Connected to: at port [0x004048c5]> R<5 cmd... ``` -The `R<` command will send the output from the execution of `cmd` to the remote connection number N (or the last one used if no id specified). - +The `R<` command will send the output from the execution of `cmd` to the remote connection number N +(or the last one used if no id specified). diff --git a/src/remote_access/windbg.md b/src/remote_access/windbg.md index fded00a4..13b0f1a6 100644 --- a/src/remote_access/windbg.md +++ b/src/remote_access/windbg.md @@ -1,6 +1,6 @@ # WinDBG Kernel-mode Debugging (KD) -The WinDBG KD interface support for rizin allows you to attach to VM running +The WinDBG KD interface support for Rizin allows you to attach to VM running Windows and debug its kernel over a serial port or network. It is also possible to use the remote GDB interface to connect and @@ -11,9 +11,11 @@ just an initial implementation which will get better in time. ## Setting Up KD on Windows -> For a complete walkthrough, refer to Microsoft's [documentation](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/setting-up-kernel-mode-debugging-in-windbg--cdb--or-ntsd). +> For a complete walkthrough, refer to Microsoft's +> [documentation](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/setting-up-kernel-mode-debugging-in-windbg--cdb--or-ntsd). ### Serial Port + Enable KD over a serial port on Windows Vista and higher like this: ``` @@ -22,7 +24,9 @@ bcdedit /dbgsettings serial debugport:1 baudrate:115200 ``` Or like this for Windows XP: - Open boot.ini and add /debug /debugport=COM1 /baudrate=115200: + +Open boot.ini and add /debug /debugport=COM1 /baudrate=115200: + ``` [boot loader] timeout=30 @@ -30,27 +34,33 @@ default=multi(0)disk(0)rdisk(0)partition(1)\WINDOWS [operating systems] multi(0)disk(0)rdisk(0)partition(1)\WINDOWS="Debugging with Cable" /fastdetect /debug /debugport=COM1 /baudrate=57600 ``` + In case of VMWare + ``` - Virtual Machine Settings -> Add -> Serial Port - Device Status: - [v] Connect at power on - Connection: - [v] Use socket (named pipe) - [_/tmp/winkd.pipe________] - From: Server To: Virtual Machine +Virtual Machine Settings -> Add -> Serial Port +Device Status: +[v] Connect at power on +Connection: +[v] Use socket (named pipe) +[_/tmp/winkd.pipe________] +From: Server To: Virtual Machine ``` + Configure the VirtualBox Machine like this: + ``` - Preferences -> Serial Ports -> Port 1 +Preferences -> Serial Ports -> Port 1 - [v] Enable Serial Port - Port Number: [_COM1_______[v]] - Port Mode: [_Host_Pipe__[v]] - [v] Create Pipe - Port/File Path: [_/tmp/winkd.pipe____] +[v] Enable Serial Port +Port Number: [_COM1_______[v]] +Port Mode: [_Host_Pipe__[v]] + [v] Create Pipe +Port/File Path: [_/tmp/winkd.pipe____] ``` + Or just spawn the VM with qemu like this: + ``` $ qemu-system-x86_64 -chardev socket,id=serial0,\ path=/tmp/winkd.pipe,nowait,server \ @@ -58,14 +68,18 @@ $ qemu-system-x86_64 -chardev socket,id=serial0,\ ``` ### Network + Enable KD over network (KDNet) on Windows 7 or later likes this: + ``` bcdedit /debug on bcdedit /dbgsettings net hostip:w.x.y.z port:n ``` + Starting from Windows 8 there is no way to enforce debugging for every boot, but it is possible to always show the advanced boot options, which allows to enable kernel debugging: + ``` bcedit /set {globalsettings} advancedoptions true ``` @@ -73,26 +87,31 @@ bcedit /set {globalsettings} advancedoptions true ## Connecting to KD interface on rizin ### Serial Port + Rizin will use the `winkd` io plugin to connect to a socket file created by virtualbox or qemu. Also, the `winkd` debugger plugin and we should specify the x86-32 too. (32 and 64 bit debugging is supported) + ``` $ rizin -a x86 -b 32 -D winkd winkd:///tmp/winkd.pipe ``` On Windows you should run the following line: + ``` $ rizin -D winkd winkd://\\.\pipe\com_1 ``` ### Network + ``` $ rizin -a x86 -b 32 -d winkd://::w.x.y.z ``` ## Using KD -When connecting to a KD interface, rizin will send a breakin packet to interrupt -the target and we will get stuck here: + +When connecting to a KD interface, Rizin will send a breakin packet to interrupt +the target, and we will get stuck here: ``` [0x828997b8]> pd 20 ;-- eip: @@ -105,12 +124,14 @@ the target and we will get stuck here: ``` In order to skip that trap we will need to change eip and run 'dc' twice: + ``` dr eip=eip+1 dc dr eip=eip+1 dc ``` + Now the Windows VM will be interactive again. We will need to kill rizin and attach again to get back to control the kernel. @@ -120,30 +141,31 @@ address of the process in the physical memory layout. # WinDBG Backend for Windows (DbgEng) -On Windows, rizin can use `DbgEng.dll` as a debugging backend, +On Windows, Rizin can use `DbgEng.dll` as a debugging backend, allowing it to make use of WinDBG's capabilities, supporting dump files, local and remote user and kernel mode debugging. -You can use the debugging DLLs included on Windows or get the latest version from Microsoft's [download page](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/debugger-download-tools) (recommended). - -> You cannot use DLLs from the Microsoft Store's `WinDbg Preview` app folder directly as they are not marked as executable for normal users. +You can use the debugging DLLs included on Windows or get the latest version from Microsoft's +[download page](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/debugger-download-tools) (recommended). -> rizin will try to load `dbgeng.dll` from the `_NT_DEBUGGER_EXTENSION_PATH` environment variable before using Windows' default library search path. +> You cannot use DLLs from the Microsoft Store's `WinDbg Preview` app folder directly as they are not marked +> as executable for normal users. +> rizin will try to load `dbgeng.dll` from the `_NT_DEBUGGER_EXTENSION_PATH` environment variable before using +> Windows' default library search path. ## Using the plugin -To use the `windbg` plugin, pass the same command-line options as you would for `WinDBG` or `kd` (see Microsoft's [documentation](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/windbg-command-line-options)), quoting/escaping when necessary: +To use the `windbg` plugin, pass the same command-line options as you would for `WinDBG` or `kd` +(see Microsoft's [documentation](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/windbg-command-line-options)), +quoting/escaping when necessary: ``` > rizin -d "windbg://-remote tcp:server=Server,port=Socket" -``` -``` + > rizin -d "windbg://MyProgram.exe \"my arg\"" -``` -``` + > rizin -d "windbg://-k net:port=,key=" -``` -``` + > rizin -d "windbg://-z MyDumpFile.dmp" ``` diff --git a/src/scripting/intro.md b/src/scripting/intro.md index 6460a343..b71bf69d 100644 --- a/src/scripting/intro.md +++ b/src/scripting/intro.md @@ -6,6 +6,7 @@ scripts/another programs via IPC (Inter-Process Communication), called rz-pipe. As mentioned a few times before there is an ability to sequence commands using `;` semicolon operator. + ``` [0x00404800]> pd 1 ; ao 1 0x00404800 b827e66100 mov eax, 0x61e627 ; "tab" @@ -22,19 +23,25 @@ stack: null family: cpu [0x00404800]> ``` + It simply runs the second command after finishing the first one, like in a shell. The second important way to sequence the commands is with a simple pipe `|` + ``` ao|grep address ``` + Note, the `|` pipe only can pipe output of rizin commands to external (shell) commands, like system programs or builtin shell commands. -There is a similar way to sequence rizin commands, using the backtick operator `` `command` ``. The quoted part will undergo command substitution and the output will be used as an argument of the command line. + +There is a similar way to sequence rizin commands, using the backtick operator `` `command` ``. The quoted part will +undergo command substitution and the output will be used as an argument of the command line. For example, we want to see a few bytes of the memory at the address referred to by the 'mov eax, addr' instruction. We can do that without jumping to it, using a sequence of commands: + ``` [0x00404800]> pd 1 0x00404800 b827e66100 mov eax, 0x61e627 ; "tab" @@ -50,16 +57,19 @@ type: mov esil: 6415911,rax,= stack: null family: cpu + [0x00404800]> ao~ptr[1] 0x0061e627 0 + [0x00404800]> px 10 @ `ao~ptr[1]` - offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF 0x0061e627 7461 6200 2e69 6e74 6572 tab..inter -[0x00404800]> ``` -And of course it's possible to redirect the output of an rizin command into a file, using the `>` and `>>` + +And of course it's possible to redirect the output of a rizin command into a file, using the `>` and `>>` commands + ``` [0x00404800]> px 10 @ `ao~ptr[1]` > example.txt [0x00404800]> px 10 @ `ao~ptr[1]` >> example.txt @@ -67,5 +77,3 @@ commands The `%$?` command describes several helpful variables you can use to do similar actions even more easily, like the `$v` "immediate value" variable, or the `$m` opcode memory reference variable. - - diff --git a/src/scripting/loops.md b/src/scripting/loops.md index 0dcf1ca3..355ffca1 100644 --- a/src/scripting/loops.md +++ b/src/scripting/loops.md @@ -5,11 +5,13 @@ there are multiple ways to do this in rizin. You can find all these loops under `@@?`. We can loop over flags: + ``` @@f:flagname-regex ``` For example, we want to see function information with `afi` command: + ``` [0x004047d6]> afi # @@ -34,19 +36,25 @@ args: 0 diff: type: new [0x004047d6]> ``` -Now let's say, for example, that we'd like see a particular field from this output for all functions found by analysis. We can do that with a loop over all function flags (whose names begin with `fcn.`): +Now let's say, for example, that we'd like see a particular field from this output for all functions found by analysis. +We can do that with a loop over all function flags (whose names begin with `fcn.`): + ``` [0x004047d6]> fs functions [0x004047d6]> afi @@f:fcn.* ~name ``` + This command will extract the `name` field from the `afi` output of every flag with a name matching the regexp `fcn.*`. -There are also other loops, for example one called `@@F` runs your command on every functions found by rizin: + +There are other loops, for example one called `@@F` runs your command on every function found by rizin: + ``` [0x004047d6]> afi @@F ~name ``` We can also loop over a list of offsets, using the following syntax: + ``` @@=1 2 3 ... N ``` @@ -77,13 +85,14 @@ fail: 0x004047da stack: null cond: al family: cpu -[0x004047d6]> ``` + Note we're using the `$$` variable which evaluates to the current offset. Also note that `$$+2` is evaluated before looping, so we can use the simple arithmetic expressions. A third way to loop is by having the offsets be loaded from a file. This file should contain one offset per line. + ``` [0x004047d0]> ?v $$ > offsets.txt [0x004047d0]> ?v $$+2 >> offsets.txt @@ -96,6 +105,7 @@ mov r9, rdx ``` If you want to iterate over all instructions of a basic block, you can do: + ``` [0x004047d0]> pi 1 @@i endbr64 @@ -103,6 +113,7 @@ push rbx test rdi, rdi je 0x14635 ``` + In this example the command `pi 1` runs over all the instructions in the current basic block. If you want to iterate over all instructions of all basic blocks of the current function, you can do: @@ -127,6 +138,7 @@ call sym.imp.abort ``` Or if you want to make the split between basic blocks clearer: + ``` [0x004047d0]> (_;pi 1 @@i; ?e)() @@b endbr64 @@ -147,5 +159,4 @@ lea rdi, str.A_NULL_argv_0__was_passed_through_an_exec_system_call. mov rcx, qword [rax] call sym.imp.fwrite call sym.imp.abort - -``` \ No newline at end of file +``` diff --git a/src/scripting/macros.md b/src/scripting/macros.md index 15de11fe..c83887bd 100644 --- a/src/scripting/macros.md +++ b/src/scripting/macros.md @@ -1,7 +1,7 @@ # Macros -Apart from simple sequencing and looping, rizin allows to write -simple macros, using this construction: +Apart from simple sequencing and looping, Rizin allows to write simple macros, using this construction: + ``` [0x00404800]> (qwe; pd 4; ao) ``` @@ -32,12 +32,14 @@ family: cpu ``` To list available macros simply call `(*`: + ``` [0x00404800]> (* (qwe ; pd 4; ao) ``` -And if want to remove some macro, just add '-' before the name: +And if you want to remove some macro, just add '-' before the name: + ``` [0x00404800]> (-qwe) Macro 'qwe' removed. @@ -59,9 +61,11 @@ simple scripting situations. To create a macro that takes arguments you simply a 0x004047d9 and rsp, 0xfffffffffffffff0 [0x004047d6]> ``` + As you can see, the arguments are named by index, starting from 0: $0, $1, ... To run a macro multiple times with different arguments, a convenient way is to use `..(`: + ``` [0x004047d6]> s entry0 [0x004047d0]> ..(foo 3 5 2 4) @@ -76,7 +80,8 @@ To run a macro multiple times with different arguments, a convenient way is to u # Aliases -rizin also offers aliases which might help you save time by quickly executing your most used commands. They are under `$?` +Rizin also offers aliases which might help you save time by quickly executing your most used commands. +They are under `$?`. The general usage of the feature is: `$alias=cmd` @@ -84,7 +89,8 @@ The general usage of the feature is: `$alias=cmd` [0x00404800]> $disas=pdf ``` -The above command will create an alias `disas` for `pdf`. The following command prints the disassembly of the main function. +The above command will create an alias `disas` for `pdf`. The following command prints the disassembly +of the main function. ``` [0x00404800]> $disas @ main @@ -109,12 +115,14 @@ $pmore ``` A single `$` in the above will list all defined aliases. It's also possible check the aliased command of an alias: + ``` [0x00404800]> $pmore? b 200; px ``` -Can we create an alias contains alias ? The answer is yes: +Can we create an alias contains alias? The answer is yes: + ``` [0x00404800]> $pStart='s 0x0;$pmore' [0x00404800]> $pStart diff --git a/src/scripting/rz-pipe.md b/src/scripting/rz-pipe.md index de9ef0cf..cf3480c1 100644 --- a/src/scripting/rz-pipe.md +++ b/src/scripting/rz-pipe.md @@ -1,26 +1,23 @@ # Rz-pipe -The rz-pipe module permits interacting with rizin instances in different methods: +The rz-pipe module permits interacting with Rizin instances in different methods: * spawn pipes (rizin -0) * http queries (cloud friendly) * tcp socket (rizin -c) -``` - pipe spawn async http tcp rap json -python x x - x x x x -haskell x x - x - - x -ocaml x x - x - - x -rust x x x x - - x -``` +| Language | pipe | spawn | async | http | tcp | rap | json | +|----------|------|-------|-------|------|-----|-----|------| +| Python | x | x | - | x | x | x | x | +| Haskell | x | x | - | x | - | - | x | +| OCaml | x | x | - | x | - | - | x | +| Rust | x | x | x | x | - | - | x | -Examples -======== +## Examples -Python ------- +### Python -``` +```shell $ pip install rzpipe ``` @@ -33,8 +30,8 @@ print(rz.cmd("afl")) print(rz.cmdj("aflj")) # evaluates JSONs and returns an object ``` -Haskell -------- +### Haskell + ```haskell import RzPipe import qualified Data.ByteString.Lazy as L @@ -50,34 +47,34 @@ main = do open "http://127.0.0.1:9090" >>= showMainFunction ``` -OCaml ------ +### OCaml + ```ocaml let result = Rz.with_command ~cmd:"/j chown" "/bin/ls" Printf.printf "Rizin output is: %s" result ``` -Rust ------ +### Rust + ```rust - #[macro_use] - extern crate rzpipe; - extern crate serde_json; - use rzpipe::RzPipe; - fn main() { - let path = Some("/bin/ls".to_owned()); - let mut rzp = open_pipe!(path).unwrap(); - println!("{}", rzp.cmd("?e Hello World").unwrap()); - if let Ok(json) = rzp.cmdj("ij") { - println!("{}", serde_json::to_string_pretty(&json).unwrap()); - println!("ARCH {}", json["bin"]["arch"]); - } - rzp.close(); - } +#[macro_use] +extern crate rzpipe; +extern crate serde_json; +use rzpipe::RzPipe; +fn main() { + let path = Some("/bin/ls".to_owned()); + let mut rzp = open_pipe!(path).unwrap(); + println!("{}", rzp.cmd("?e Hello World").unwrap()); + if let Ok(json) = rzp.cmdj("ij") { + println!("{}", serde_json::to_string_pretty(&json).unwrap()); + println!("ARCH {}", json["bin"]["arch"]); + } + rzp.close(); +} ``` -Ruby ------ +### Ruby + ```ruby require './rzpipe' diff --git a/src/search_bytes/automation.md b/src/search_bytes/automation.md index 2c96bda7..84578239 100644 --- a/src/search_bytes/automation.md +++ b/src/search_bytes/automation.md @@ -1,7 +1,10 @@ ## Search Automation -The `cmd.hit` configuration variable is used to define a rizin command to be executed when a matching entry is found by the search engine. If you want to run several commands, separate them with `;`. Alternatively, you can arrange them in a separate script, and then invoke it as a whole with `. script-file-name` command. +The `cmd.hit` configuration variable is used to define a Rizin command to be executed when a matching entry +is found by the search engine. If you want to run several commands, separate them with `;`. Alternatively, you can +arrange them in a separate script, and then invoke it as a whole with `. script-file-name` command. For example: + ``` [0x00404888]> e cmd.hit="p8 8" [0x00404888]> / lib diff --git a/src/search_bytes/backward_search.md b/src/search_bytes/backward_search.md index 05467e6f..f15cdce4 100644 --- a/src/search_bytes/backward_search.md +++ b/src/search_bytes/backward_search.md @@ -1,6 +1,7 @@ ## Searching Backwards -Sometimes you want to find a keyword backwards. This is, before the current offset, to do this you can seek back and search forward by adding some search.from/to restrictions, or use the `/b` command. +Sometimes you want to find a keyword backwards. This is, before the current offset, to do this you can seek back +and search forward by adding some search.from/to restrictions, or use the `/b` command. ``` [0x100001200]> / nop @@ -13,7 +14,8 @@ Sometimes you want to find a keyword backwards. This is, before the current offs [0x100004f50]> ``` -Note that `/b` is doing the same as `/`, but backward, so what if we want to use `/x` backward? We can use `/bx`, and the same goes for other search subcommands: +Note that `/b` is doing the same as `/`, but backward, so what if we want to use `/x` backward? We can use `/bx`, +and the same goes for other search subcommands: ``` [0x100001200]> /x 90 @@ -41,4 +43,4 @@ Note that `/b` is doing the same as `/`, but backward, so what if we want to use 0x10000248f hit3_5 90 0x100001a23 hit3_6 90 [0x10000355b]> -``` \ No newline at end of file +``` diff --git a/src/search_bytes/basic_searches.md b/src/search_bytes/basic_searches.md index eca6c14b..d85bf693 100644 --- a/src/search_bytes/basic_searches.md +++ b/src/search_bytes/basic_searches.md @@ -2,70 +2,92 @@ A basic search for a plain text string in a file would be something like: - $ rizin -q -c "/ lib" /bin/ls - Searching 3 bytes from 0x00400000 to 0x0041ae08: 6c 69 62 - hits: 9 - 0x00400239 hit0_0 "lib64/ld-linux-x86-64.so.2" - 0x00400f19 hit0_1 "libselinux.so.1" - 0x00400fae hit0_2 "librt.so.1" - 0x00400fc7 hit0_3 "libacl.so.1" - 0x00401004 hit0_4 "libc.so.6" - 0x004013ce hit0_5 "libc_start_main" - 0x00416542 hit0_6 "libs/" - 0x00417160 hit0_7 "lib/xstrtol.c" - 0x00417578 hit0_8 "lib" - -As can be seen from the output above, rizin generates a "hit" flag for every entry found. You can then use the `ps` command to see the strings stored at the offsets marked by the flags in this group, and they will have names of the form `hit0_`: - - [0x00404888]> / ls - ... - [0x00404888]> ps @ hit0_0 - lseek - -You can search for wide-char strings (e.g., unicode letters) using the `/w` command: - - [0x00000000]> /w Hello - 0 results found. - -Using Rizin, you can also automatically search for magic signatures in the file. This can be done using `/m`. The offset and the file format will be displayed after the search. - - [0x00000000]> /m - -- 0 b1606 - 0x00000000 1 JPEG image , EXIF standard - 0x0000000c 1 TIFF image data, big-endian - 0x000b1510 1 7-zip archive data, version 0.3 +```shell +$ rizin -q -c "/ lib" /bin/ls +Searching 3 bytes from 0x00400000 to 0x0041ae08: 6c 69 62 +hits: 9 +0x00400239 hit0_0 "lib64/ld-linux-x86-64.so.2" +0x00400f19 hit0_1 "libselinux.so.1" +0x00400fae hit0_2 "librt.so.1" +0x00400fc7 hit0_3 "libacl.so.1" +0x00401004 hit0_4 "libc.so.6" +0x004013ce hit0_5 "libc_start_main" +0x00416542 hit0_6 "libs/" +0x00417160 hit0_7 "lib/xstrtol.c" +0x00417578 hit0_8 "lib" +``` + +As can be seen from the output above, Rizin generates a "hit" flag for every entry found. You can then use the `ps` +command to see the strings stored at the offsets marked by the flags in this group, and they will have names +of the form `hit0_`: + +``` +[0x00404888]> / ls +... +[0x00404888]> ps @ hit0_0 +lseek +``` + +You can search for wide-char strings (e.g., Unicode letters) using the `/w` command: + +``` +[0x00000000]> /w Hello +0 results found. +``` + +Using Rizin, you can also automatically search for magic signatures in the file. This can be done using `/m`. +The offset and the file format will be displayed after the search. + +``` +[0x00000000]> /m +-- 0 b1606 +0x00000000 1 JPEG image , EXIF standard +0x0000000c 1 TIFF image data, big-endian +0x000b1510 1 7-zip archive data, version 0.3 +``` To perform a case-insensitive search for strings use `/i`: - [0x0040488f]> /i Stallman - Searching 8 bytes from 0x00400238 to 0x0040488f: 53 74 61 6c 6c 6d 61 6e - [# ]hits: 004138 < 0x0040488f hits = 0 +``` +[0x0040488f]> /i Stallman +Searching 8 bytes from 0x00400238 to 0x0040488f: 53 74 61 6c 6c 6d 61 6e +[# ]hits: 004138 < 0x0040488f hits = 0 +``` It is possible to specify hexadecimal escape sequences in the search string by prepending them with `\x`: - [0x00000000]> / \x7FELF +``` +[0x00000000]> / \x7FELF +``` -if, instead, you are searching for a string of hexadecimal values, you're probably better of using the `/x` command: +If, instead, you are searching for a string of hexadecimal values, you're probably better off using the `/x` command: - [0x00000000]> /x 7F454C46 +``` +[0x00000000]> /x 7F454C46 +``` Once the search is done, the results are stored in the `searches` flag space. - [0x00000000]> fs - 0 0 . strings - 1 0 . symbols - 2 6 . searches - - [0x00000000]> f - 0x00000135 512 hit0_0 - 0x00000b71 512 hit0_1 - 0x00000bad 512 hit0_2 - 0x00000bdd 512 hit0_3 - 0x00000bfb 512 hit0_4 - 0x00000f2a 512 hit0_5 +``` +[0x00000000]> fs +0 0 . strings +1 0 . symbols +2 6 . searches + +[0x00000000]> f +0x00000135 512 hit0_0 +0x00000b71 512 hit0_1 +0x00000bad 512 hit0_2 +0x00000bdd 512 hit0_3 +0x00000bfb 512 hit0_4 +0x00000f2a 512 hit0_5 +``` To remove "hit" flags after you do not need them anymore, use the `f- hit*` command. -Often, during long search sessions, you will need to launch the latest search more than once. You can use the `//` command to repeat the last search. +Often, during long search sessions, you will need to launch the latest search more than once. You can use the `//` +command to repeat the last search. - [0x00000f2a]> // ; repeat last search +``` +[0x00000f2a]> // ; repeat last search +``` diff --git a/src/search_bytes/configurating_the_search.md b/src/search_bytes/configurating_the_search.md index c6871375..f60d8d32 100644 --- a/src/search_bytes/configurating_the_search.md +++ b/src/search_bytes/configurating_the_search.md @@ -1,16 +1,29 @@ ## Configuring Search Options -The rizin search engine can be configured through several configuration variables, modifiable with the `e` command. +The Rizin search engine can be configured through several configuration variables, modifiable with the `e` command. + ``` -e cmd.hit=x ; rizin command to execute on every search hit -e search.distance=0 ; search string distance -e search.in=foo ; specify search boundarie. Supported values are listed under e search.in=?? -e search.align=4 ; only show search results aligned by specified boundary. -e search.from=0 ; start address -e search.to=0 ; end address -e search.asmstr=0 ; search for string instead of assembly -e search.flags=true ; if enabled, create flags on hits +[0x00000000]> ell cmd.hit +cmd.hit = ; Run when a search hit is found + +[0x00000000]> ell search +search.align = 0 ; Only catch aligned search hits +search.case_sensitive = smart ; Set grep(~) as case smart/sensitive/insensitive [smart, sensitive, insensitive] +search.chunk = 0 ; Chunk size for /+ (default size is asm.bits/8 +search.contiguous = true ; Accept contiguous/adjacent search hits +search.distance = 0 ; Search string distance +search.esilcombo = 8 ; Stop search after N consecutive hits +search.flags = true ; All search results are flagged, otherwise only printed +search.from = 0xffffffffffffffff ; Search start address +search.in = io.maps ; Specify search boundaries [raw, block, bin.section, bin.sections, bin.sections.rwx, bin.sections.r, bin.sections.rw, bin.sections.rx, bin.sections.wx, bin.sections.x, io.map, io.maps, io.maps.rwx, io.maps.r, io.maps.rw, io.maps.rx, io.maps.wx, io.maps.x, dbg.stack, dbg.heap, dbg.map, dbg.maps, dbg.maps.rwx, dbg.maps.r, dbg.maps.rw, dbg.maps.rx, dbg.maps.wx, dbg.maps.x, analysis.fcn, analysis.bb] +search.kwidx = 1 ; Store last search index count +search.maxhits = 0 ; Maximum number of hits (0: no limit) +search.overlap = false ; Look for overlapped search hits +search.prefix = hit ; Prefix name in search hits label +search.show = true ; Show search results +search.to = 0xffffffffffffffff ; Search end address ``` -The `search.align` variable is used to limit valid search hits to certain alignment. For example, with `e search.align=4` you will see only hits found at 4-bytes aligned offsets. -The `search.flags` boolean variable instructs the search engine to flag hits so that they can be referenced later. If a currently running search is interrupted with `Ctrl-C` keyboard sequence, current search position is flagged with `search_stop`. +The `search.flags` boolean variable instructs the search engine to flag hits so that they can be referenced later. +If a currently running search is interrupted with `Ctrl-C` keyboard sequence, current search position is flagged +with `search_stop`. diff --git a/src/search_bytes/intro.md b/src/search_bytes/intro.md index 63878561..2217ef24 100644 --- a/src/search_bytes/intro.md +++ b/src/search_bytes/intro.md @@ -1,52 +1,51 @@ # Searching for Bytes -The rizin search engine is based on work done by esteve, plus multiple features implemented on top of it. It supports multiple keyword searches, binary masks, and hexadecimal values. It automatically creates flags for search hit locations ease future referencing. +The Rizin search engine is based on work done by esteve, plus multiple features implemented on top of it. It supports +multiple keyword searches, binary masks, and hexadecimal values. It automatically creates flags for search hit +locations ease future referencing. Search is initiated by `/` command. ``` [0x00000000]> /? -|Usage: /[!bf] [arg]Search stuff (see 'el search' for options) +Usage: /[!bf] [arg] Search stuff (see 'e??search' for options) |Use io.va for searching in non virtual addressing spaces -| / foo\x00 search for string 'foo\0' -| /j foo\x00 search for string 'foo\0' (json output) -| /! ff search for first occurrence not matching, command modifier -| /!x 00 inverse hexa search (find first byte != 0x00) -| /+ /bin/sh construct the string with chunks -| // repeat last search -| /a jmp eax assemble opcode and search its bytes -| /A jmp find analyzed instructions of this type (/A? for help) -| /b search backwards, command modifier, followed by other command -| /B search recognized RzBin headers -| /c jmp [esp] search for asm code matching the given string -| /ce rsp,rbp search for esil expressions matching -| /C[ar] search for crypto materials -| /d 101112 search for a deltified sequence of bytes -| /e /E.F/i match regular expression -| /E esil-expr offset matching given esil expressions %%= here -| /f search forwards, command modifier, followed by other command -| /F file [off] [sz] search contents of file with offset and size -| /g[g] [from] find all graph paths A to B (/gg follow jumps, see search.count and -analysis.depth) -| /h[t] [hash] [len] find block matching this hash. See ph -| /i foo search for string 'foo' ignoring case -| /m magicfile search for matching magic file (use blocksize) -| /o [n] show offset of n instructions backward -| /O [n] same as /o, but with a different fallback if analysis cannot be used -| /p patternsize search for pattern of given size -| /P patternsize search similar blocks -| /r[erwx][?] sym.printf analyze opcode reference an offset (/re for esil) -| /R [grepopcode] search for matching ROP gadgets, semicolon-separated -| /s search for all syscalls in a region (EXPERIMENTAL) -| /v[1248] value look for an `cfg.bigendian` 32bit value -| /V[1248] min max look for an `cfg.bigendian` 32bit value in range -| /w foo search for wide string 'f\0o\0o\0' -| /wi foo search for wide string ignoring case 'f\0o\0o\0' -| /x ff..33 search for hex string ignoring some nibbles -| /x ff0033 search for hex string -| /x ff43:ffd0 search for hexpair with mask -| /z min max search for strings of given size +| / foo\x00 search for string 'foo\0' +| /j foo\x00 search for string 'foo\0' (json output) +| /! ff search for first occurrence not matching, command modifier +| /!x 00 inverse hexa search (find first byte != 0x00) +| /+ /bin/sh construct the string with chunks +| // repeat last search +| /a[?][1aoditfmsltf] jmp eax assemble opcode and search its bytes +| /b search backwards, command modifier, followed by other command +| /c[?][adr] search for crypto materials +| /d 101112 search for a deltified sequence of bytes +| /e /E.F/i match regular expression +| /E esil-expr offset matching given esil expressions $$ = here +| /f search forwards, (command modifier) +| /F file [off] [sz] search contents of file with offset and size +| /g[g] [from] find all graph paths A to B (/gg follow jumps, see search.count and analysis.depth) +| /h[t] [hash] [len] find block matching this hash. See ph +| /i foo search for string 'foo' ignoring case +| /m[?][ebm] magicfile search for magic, filesystems or binary headers +| /o [n] show offset of n instructions backward +| /O [n] same as /o, but with a different fallback if analysis cannot be used +| /p patternsize search for pattern of given size +| /P patternsize search similar blocks +| /s[*] [threshold] find sections by grouping blocks with similar entropy +| /r[rwx][?] sym.printf analyze opcode reference an offset +| /R [grepopcode] search for matching ROP gadgets, semicolon-separated +| /v[1248] value look for an `cfg.bigendian` 32bit value +| /V[1248] min max look for an `cfg.bigendian` 32bit value in range +| /w foo search for wide string 'f\0o\0o\0' +| /wi foo search for wide string ignoring case 'f\0o\0o\0' +| /x ff..33 search for hex string ignoring some nibbles +| /x ff0033 search for hex string +| /x ff43:ffd0 search for hexpair with mask +| /z min max search for strings of given size +| /* [comment string] add multiline comment, end it with '*/' ``` -Because everything is treated as a file in rizin, it does not matter whether you search in a socket, a remote device, in process memory, or a file. +Because everything is treated as a file in Rizin, it does not matter whether you search in a socket, a remote device, +in process memory, or a file. -note that '/*' starts multiline comment. It's not for searching. type '*/' to end comment. +Note that `/*` starts multiline comment. It's not for searching. Type `*/` to end comment. diff --git a/src/search_bytes/pattern_search.md b/src/search_bytes/pattern_search.md index 9d7c0dd7..80bf6ea2 100644 --- a/src/search_bytes/pattern_search.md +++ b/src/search_bytes/pattern_search.md @@ -1,8 +1,12 @@ ## Pattern Matching Search -The `/p` command allows you to apply repeated pattern searches on IO backend storage. It is possible to identify repeated byte sequences without explicitly specifying them. The only command's parameter sets minimum detectable pattern length. +The `/p` command allows you to apply repeated pattern searches on IO backend storage. It is possible to identify +repeated byte sequences without explicitly specifying them. The only command's parameter sets minimum detectable +pattern length. Here is an example: + ``` [0x00000000]> /p 10 ``` + This command output will show different patterns found and how many times each of them is encountered. diff --git a/src/search_bytes/search_in_assembly.md b/src/search_bytes/search_in_assembly.md index 91224006..335a112d 100644 --- a/src/search_bytes/search_in_assembly.md +++ b/src/search_bytes/search_in_assembly.md @@ -3,6 +3,7 @@ If you want to search for a certain assembler opcodes, you can use `/a` commands. The command `/ad/ jmp [esp]` searches for the specified category of assembly mnemonic: + ``` [0x00404888]> /ad/ jmp qword [rdx] f hit_0 @ 0x0040e50d # 2: jmp qword [rdx] @@ -15,6 +16,7 @@ f hit_6 @ 0x00419c43 # 3: jmp qword [rdx] ``` The command `/a jmp eax` assembles a string to machine code, and then searches for the resulting bytes: + ``` [0x00404888]> /a jmp eax hits: 1 diff --git a/src/search_bytes/searching_aes_keys.md b/src/search_bytes/searching_aes_keys.md index 5a68c8a7..8266a72d 100644 --- a/src/search_bytes/searching_aes_keys.md +++ b/src/search_bytes/searching_aes_keys.md @@ -1,6 +1,9 @@ ## Searching for AES Keys -Thanks to Victor Muñoz, rizin now has support of the algorithm he developed, capable of finding **expanded AES** keys with `/Ca` command. It searches from current seek position up to the `search.distance` limit, or until end of file is reached. You can interrupt current search by pressing `Ctrl-C`. For example, to look for AES keys in physical memory of your system: +Thanks to Victor Muñoz, Rizin now has support of the algorithm he developed, capable of finding **expanded AES** keys +with `/Ca` command. It searches from current seek position up to the `search.distance` limit, or until end of file +is reached. You can interrupt current search by pressing `Ctrl-C`. For example, to look for AES keys in physical memory +of your system: ``` $ sudo rizin /dev/mem @@ -8,7 +11,8 @@ $ sudo rizin /dev/mem 0 AES keys found ``` -If you are simply looking for plaintext AES keys in your binary, `/Ca` will not find them, but you might want to search with `is~AES` instead if the programmer left those hints for you: +If you are simply looking for plaintext AES keys in your binary, `/Ca` will not find them, but you might want to search +with `is~AES` instead if the programmer left those hints for you: ``` [0x00000000]> /Ca @@ -24,7 +28,11 @@ hits: 0 010 0x000096d4 0x000196d4 GLOBAL OBJ 16 AES_KEY ``` -Other than that, AES keys might show up in different ways in the binary: encrypted, hidden by another encrypting routine, so there's no absolute way other than understanding the binary being analyzed. For instance, `p=e` might give some hints if high(er) entropy sections are found trying to cover up a hardcoded secret. As an example on entropy searching, since rizin 3.2.0, there's the possibility to delimit entropy sections for later use like so: +Other than that, AES keys might show up in different ways in the binary: encrypted, hidden by another +encrypting routine, so there's no absolute way other than understanding the binary being analyzed. For instance, +`p=e` might give some hints if high(er) entropy sections are found trying to cover up a hardcoded secret. +As an example on entropy searching, since rizin 3.2.0, there's the possibility to delimit entropy sections +for later use like so: ``` [0x00000000]> b @@ -49,6 +57,9 @@ f entropy_section_5 0x00002000 0x0144d374 f entropy_section_6 0x00002000 0x0144f374 ``` -The blocksize is increased to 4096 bytes from the default 100 bytes so that the entropy search `/s` can work on reasonably sized chunks for entropy analysis. The sections flags can be applied with the dot operator, `./s*` and then looped through `px 32 @@f:entropy*`. +The blocksize is increased to 4096 bytes from the default 100 bytes so that the entropy search `/s` can work +on reasonably sized chunks for entropy analysis. The sections flags can be applied with the dot operator, +`./s*` and then looped through `px 32 @@f:entropy*`. -Moreover AES keys might be referenced from strings or pointed from the imports, for instance, so the `/` and other search-related commands can come in handy in this regard. +Moreover, AES keys might be referenced from strings or pointed from the imports, for instance, +so the `/` and other search-related commands can come in handy in this regard. diff --git a/src/tools/intro.md b/src/tools/intro.md index 08fa1a9a..2cd798b4 100644 --- a/src/tools/intro.md +++ b/src/tools/intro.md @@ -1,10 +1,14 @@ # Tools -Rizin is not just the only tool provided by the rizin project. The rest if chapters in this book are focused on explaining the use of the rizin tool, this chapter will focus on explaining all the other companion tools that are shipped inside the rizin project. +Rizin is not just the only tool provided by the Rizin project. The rest if chapters in this book are focused +on explaining the use of the rizin tool, this chapter will focus on explaining all the other companion tools +that are shipped inside the Rizin project. -All the functionalities provided by the different APIs and plugins have also different tools to allow to use them from the commandline and integrate them with shellscripts easily. +All the functionalities provided by the different APIs and plugins have also different tools to allow to use them +from the commandline and integrate them with shellscripts easily. -Thanks to the orthogonal design of the framework it is possible to do all the things that rizin is able from different places: +Thanks to the orthogonal design of the framework it is possible to do all the things that Rizin is able +from different places: * These companion tools * Native library apis diff --git a/src/tools/rz-asm/assemble.md b/src/tools/rz-asm/assemble.md index 952cda7c..0ff78c9b 100644 --- a/src/tools/rz-asm/assemble.md +++ b/src/tools/rz-asm/assemble.md @@ -1,10 +1,13 @@ ## Assembler -Assembling is the action to take a computer instruction in human readable form (using mnemonics) and convert that into a bunch of bytes that can be executed by a machine. +Assembling is the action to take a computer instruction in human-readable form (using mnemonics) and convert that +into a bunch of bytes that can be executed by a machine. -In rizin, the assembler and disassembler logic is implemented in the rz_asm_* API, and can be used with the pa and pad commands from the commandline as well as using `rz-asm`. +In Rizin, the assembler and disassembler logic is implemented in the rz_asm_* API, and can be used with +the pa and pad commands from the commandline as well as using `rz-asm`. -Rz-asm can be used to quickly copy-paste hexpairs that represent a given machine instruction. The following line is assembling this mov instruction for x86/32. +Rz-asm can be used to quickly copy-paste hexpairs that represent a given machine instruction. +The following line is assembling this mov instruction for x86/32. ``` $ rz-asm -a x86 -b 32 'mov eax, 33' @@ -18,15 +21,21 @@ $ echo 'push eax;nop;nop' | rz-asm -f - 5090 ``` -As you have seen, rz-asm can assemble one or many instructions. In line by separating them with a semicolon `;`, but can also read that from a file, using generic nasm/gas/.. syntax and directives. You can check the rz-asm manpage for more details on this. +As you have seen, rz-asm can assemble one or many instructions. In line by separating them with a semicolon `;`, +but can also read that from a file, using generic nasm/gas/.. syntax and directives. You can check the rz-asm manpage +for more details on this. -The `pa` and `pad` are a subcommands of print, what means they will only print assembly or disassembly. In case you want to actually write the instruction it is required to use `wa` or `wx` commands with the assembly string or bytes appended. +The `pa` and `pad` are a subcommands of print, what means they will only print assembly or disassembly. +In case you want to actually write the instruction it is required to use `wa` or `wx` commands with +the assembly string or bytes appended. -The assembler understands the following input languages and their flavors: `x86` (Intel and AT&T variants), `olly` (OllyDBG syntax), `powerpc` (PowerPC), `arm` and `java`. For Intel syntax, rz-asm tries to mimic NASM or GAS. +The assembler understands the following input languages and their flavors: `x86` (Intel and AT&T variants), +`olly` (OllyDBG syntax), `powerpc` (PowerPC), `arm` and `java`. For Intel syntax, rz-asm tries to mimic NASM or GAS. -There are several examples in the rz-asm source code directory. Consult them to understand how you can assemble a raw binary file from a rz-asm description. +There are several examples in the rz-asm source code directory. Consult them to understand how you can assemble +a raw binary file from a rz-asm description. -Lets create an assembly file called `selfstop.rzasm`: +Let's create an assembly file called `selfstop.rzasm`: ```asm ; @@ -81,9 +90,11 @@ Now we can assemble it in place: ### Visual mode -Assembling also is accessible in rizin visual mode through pressing `A` key to insert the assembly in the current offset. +Assembling also is accessible in Rizin visual mode through pressing `A` key to insert the assembly in +the current offset. -The cool thing of writing assembly using the visual assembler interface that the changes are done in memory until you press enter. +The cool thing of writing assembly using the visual assembler interface that the changes are done in memory until +you press enter. So you can check the size of the code and which instructions is overlapping before committing the changes. diff --git a/src/tools/rz-asm/disassemble.md b/src/tools/rz-asm/disassemble.md index 6ef853e6..a8ada568 100644 --- a/src/tools/rz-asm/disassemble.md +++ b/src/tools/rz-asm/disassemble.md @@ -1,6 +1,7 @@ ## Disassembler -Disassembling is the inverse action of assembling. Rz-asm takes hexpair as an input (but can also take a file in binary form) and show the human readable form. +Disassembling is the inverse action of assembling. Rz-asm takes hexpair as an input (but can also take a file +in binary form) and show the human-readable form. To do this we can use the `-d` option of rz-asm like this: @@ -11,9 +12,10 @@ nop Rz-asm also has the `-D` flag to show the disassembly like `-d` does, but includes offset and bytes. -In rizin there are many commands to perform a disassembly from a specific place in memory. +In Rizin there are many commands to perform a disassembly from a specific place in memory. -You might be interested in trying if you want different outputs for later parsing with your scripts, or just grep to find what you are looking for: +You might be interested in trying if you want different outputs for later parsing with your scripts, or just grep +to find what you are looking for: ### pd N diff --git a/src/tools/rz-asm/intro.md b/src/tools/rz-asm/intro.md index e0f1aec1..8fb207bb 100644 --- a/src/tools/rz-asm/intro.md +++ b/src/tools/rz-asm/intro.md @@ -1,112 +1,126 @@ # Rz-asm -`rz-asm` is an inline assembler/disassembler. Its main function is to get bytes corresponding to given machine instruction opcode. +`rz-asm` is an inline assembler/disassembler. Its main function is to get bytes corresponding to given machine +instruction opcode. ``` $ rz-asm -h Usage: rz-asm [-ACdDehLBvw] [-a arch] [-b bits] [-o addr] [-s syntax] [-f file] [-F fil:ter] [-i skip] [-l len] 'code'|hex|- - -a [arch] Set architecture to assemble/disassemble (see -L) - -A Show Analysis information from given hexpairs - -b [bits] Set cpu register size (8, 16, 32, 64) (RASM2_BITS) - -B Binary input/output (-l is mandatory for binary input) - -c [cpu] Select specific CPU (depends on arch) - -C Output in C format - -d, -D Disassemble from hexpair bytes (-D show hexpairs) - -e Use big endian instead of little endian - -E Display ESIL expression (same input as in -d) - -f [file] Read data from file - -F [in:out] Specify input and/or output filters (att2intel, x86.pseudo, ...) - -h, -hh Show this help, -hh for long - -i [len] ignore/skip N bytes of the input buffer - -j output in json format - -k [kernel] Select operating system (linux, windows, darwin, ..) - -l [len] Input/Output length - -L List Asm plugins: (a=asm, d=disasm, A=analyze, e=ESIL) - -o [offset] Set start address for code (default 0) - -O [file] Output file name (rz-asm -Bf a.asm -O a) - -p Run SPP over input for assembly - -q quiet mode - -r output in rizin commands - -s [syntax] Select syntax (intel, att) - -v Show version information - -w What's this instruction for? describe opcode + -a [arch] Set architecture to assemble/disassemble (see -L) + -A Show Analysis information from given hexpairs + -b [bits] Set cpu register size (8, 16, 32, 64) (RZ_ASM_BITS) + -B Binary input/output (-l is mandatory for binary input) + -c [cpu] Select specific CPU (depends on arch) + -C Output in C format + -d, -D Disassemble from hexpair bytes (-D show hexpairs) + -e Use big endian instead of little endian + -I Display lifted RzIL code (same input as in -d, IL is also validated) + -E Display ESIL expression (same input as in -d) + -f [file] Read data from file + -F [in:out] Specify input and/or output filters (att2intel, x86.pseudo, ...) + -h, -hh Show this help, -hh for long + -i [len] Ignore N bytes of the input buffer + -j Output in JSON format + -k [kernel] Select operating system (linux, windows, darwin, ..) + -l [len] Input/Output length + -L List Asm plugins: (a=asm, d=disasm, A=analyze, e=ESIL) + -o, -@ [addr] Set start address for code (default 0) + -O [file] Output file name (rz-asm -Bf a.asm -O a) + -p Run SPP over input for assembly + -q Quiet mode + -r Output in rizin commands + -s [syntax] Select syntax (intel, att) + -v Show version information + -x Use hex dwords instead of hex pairs when assembling. + -w Describe opcode If '-l' value is greater than output length, output is padded with nops If the last argument is '-' reads from stdin Environment: - RASM2_NOPLUGINS do not load shared plugins (speedup loading) - RASM2_ARCH same as rz-asm -a - RASM2_BITS same as rz-asm -b - R_DEBUG if defined, show error messages and crash signal - + RZ_NOPLUGINS do not load shared plugins (speedup loading) + RZ_ASM_ARCH same as rz-asm -a + RZ_ASM_BITS same as rz-asm -b + RZ_DEBUG if defined, show error messages and crash signal ``` -Plugins for supported target architectures can be listed with the `-L` option. Knowing a plugin name, you can use it by specifying its name to the `-a` option +Plugins for supported target architectures can be listed with the `-L` option. Knowing a plugin name, +you can use it by specifying its name to the `-a` option ``` $ rz-asm -L _dAe 8 16 6502 LGPL3 6502/NES/C64/Tamagotchi/T-1000 CPU -_dAe 8 8051 PD 8051 Intel CPU -_dA_ 16 32 arc GPL3 Argonaut RISC Core -a___ 16 32 64 arm.as LGPL3 as ARM Assembler (use ARM_AS environment) +adAe 8 8051 PD 8051 Intel CPU +_dA_ 32 amd29k LGPL3 AMD 29k RISC CPU (by deroad) +a___ 16 32 64 arm.as LGPL3 as ARM Assembler (use RZ_ARM32_AS and RZ_ARM64_AS environment) (by pancake) adAe 16 32 64 arm BSD Capstone ARM disassembler -_dA_ 16 32 64 arm.gnu GPL3 Acorn RISC Machine CPU -_d__ 16 32 arm.winedbg LGPL2 WineDBG's ARM disassembler -adAe 8 16 avr GPL AVR Atmel -adAe 16 32 64 bf LGPL3 Brainfuck (by pancake, nibble) v4.0.0 +adAe 8 16 avr LGPL3 AVR Atmel +adA_ 16 32 64 bf LGPL3 Brainfuck (by pancake, nibble) v4.0.0 _dA_ 32 chip8 LGPL3 Chip8 disassembler +_dA_ 16 32 64 cil LGPL3 .NET Common Intermediate Language _dA_ 16 cr16 LGPL3 cr16 disassembly plugin -_dA_ 32 cris GPL3 Axis Communications 32-bit embedded processor adA_ 32 64 dalvik LGPL3 AndroidVM Dalvik ad__ 16 dcpu16 PD Mojang's DCPU-16 -_dA_ 32 64 ebc LGPL3 EFI Bytecode -adAe 16 gb LGPL3 GameBoy(TM) (z80-like) +_dA_ 32 64 ebc LGPL3 EFI Bytecode (by Fedor Sakharov) +adAe 16 gb LGPL3 GameBoy(TM) (z80-like) (by condret) _dAe 16 h8300 LGPL3 H8/300 disassembly plugin -_dAe 32 hexagon LGPL3 Qualcomm Hexagon (QDSP6) V6 -_d__ 32 hppa GPL3 HP PA-RISC -_dAe i4004 LGPL3 Intel 4004 microprocessor +_dA_ 32 hexagon LGPL3 Qualcomm Hexagon (QDSP6) V6 (by Rot127) +_dA_ 4 i4004 LGPL3 Intel 4004 microprocessor _dA_ 8 i8080 BSD Intel 8080 CPU -adA_ 32 java Apache Java bytecode -_d__ 32 lanai GPL3 LANAI +adA_ 32 java LGPL-3 Java bytecode disassembler (by deroad) _d__ 8 lh5801 LGPL3 SHARP LH5801 disassembler -_d__ 32 lm32 BSD disassembly plugin for Lattice Micro 32 ISA -_dA_ 16 32 m68k BSD Capstone M68K disassembler -_dA_ 32 malbolge LGPL3 Malbolge Ternary VM +_d__ 32 lm32 BSD disassembly plugin for Lattice Micro 32 ISA (by Felix Held) +adA_ 8 luac LGPL3 luac disassemble plugin +_dA_ 32 m68k BSD Capstone M68K disassembler +_dA_ 8 32 m680x BSD Capstone M680X Disassembler +_dA_ 32 malbolge LGPL3 Malbolge Ternary VM (by condret) +_dA_ 32 mcore LGPL3 Motorola MCORE disassembler _d__ 16 mcs96 LGPL3 condrets car adAe 16 32 64 mips BSD Capstone MIPS disassembler -adAe 32 64 mips.gnu GPL3 MIPS CPU _dA_ 16 msp430 LGPL3 msp430 disassembly plugin -_dA_ 32 nios2 GPL3 NIOS II Embedded Processor +adA_ 16 32 64 null MIT no disassemble (by pancake) v1.0.0 +_dA_ 32 or1k LGPL3 OpenRISC 1000 _dAe 8 pic LGPL3 PIC disassembler -_dAe 32 64 ppc BSD Capstone PowerPC disassembler -_dA_ 32 64 ppc.gnu GPL3 PowerPC -_d__ 32 propeller LGPL3 propeller disassembly plugin -_dA_ 32 64 riscv GPL RISC-V -_dAe 32 rsp LGPL3 Reality Signal Processor -_dAe 32 sh GPL3 SuperH-4 CPU +a___ 32 64 ppc.as LGPL3 as PPC Assembler (use RZ_PPC_AS environment) (by eagleoflqj) +_dAe 32 64 ppc BSD Capstone PowerPC disassembler (by pancake) +_dA_ 32 propeller LGPL3 propeller disassembly plugin +_dA_ 8 16 pyc LGPL3 PYC disassemble plugin +adA_ 32 rl78 LGPL3 Renesas RL78 disassembler (by Bastian Engel) +_dA_ 32 rsp LGPL3 Reality Signal Processor +_dA_ 32 rx LGPL3 Renesas RX Family disassembler (by Heersin) +adAe 32 sh LGPL3 SuperH-4 CPU (by DMaroo) _dA_ 8 16 snes LGPL3 SuperNES CPU -_dAe 32 64 sparc BSD Capstone SPARC disassembler -_dA_ 32 64 sparc.gnu GPL3 Scalable Processor Architecture -_d__ 16 spc700 LGPL3 spc700, snes' sound-chip -_d__ 32 sysz BSD SystemZ CPU disassembler +_dA_ 32 64 sparc BSD Capstone SPARC disassembler +_dA_ 16 spc700 LGPL3 spc700, snes' sound-chip +_dA_ 32 64 sysz BSD SystemZ CPU disassembler _dA_ 32 tms320 LGPLv3 TMS320 DSP family (c54x,c55x,c55x+,c64x) -_d__ 32 tricore GPL3 Siemens TriCore CPU -_dAe 32 v810 LGPL3 v810 disassembly plugin +_d__ 32 tms320c64x BSD Capstone TMS320c64x disassembler +_dAe 32 v810 LGPL3 v810 disassembly plugin (by pancake) _dAe 32 v850 LGPL3 v850 disassembly plugin -_dAe 8 32 vax GPL VAX adA_ 32 wasm MIT WebAssembly (by cgvwzq) v0.1.0 -_dA_ 32 ws LGPL3 Whitespace esotheric VM -a___ 16 32 64 x86.as LGPL3 Intel X86 GNU Assembler +a___ 16 32 64 x86.as LGPL3 Intel X86 GNU Assembler (Use RZ_X86_AS env) _dAe 16 32 64 x86 BSD Capstone X86 disassembler a___ 16 32 64 x86.nasm LGPL3 X86 nasm assembler a___ 16 32 64 x86.nz LGPL3 x86 handmade assembler _dA_ 16 xap PD XAP4 RISC (CSR) -_dA_ 32 xcore BSD Capstone XCore disassembler +_dA_ 32 xcore BSD Capstone XCore disassembler (by pancake) +_dAe 32 64 riscv.cs BSD Capstone RISCV disassembler +_dA_ 32 tricore BSD Siemens TriCore CPU (by billow) +_dA_ 16 32 arc GPL3 Argonaut RISC Core +_dA_ 32 cris GPL3 Axis Communications 32-bit embedded processor (by pancake) +_d__ 32 hppa GPL3 HP PA-RISC +_d__ 32 lanai GPL3 LANAI +adAe 32 64 mips.gnu GPL3 MIPS CPU +_dA_ 32 nios2 GPL3 NIOS II Embedded Processor +_dAe 32 64 riscv GPL3 RISC-V +_dA_ 32 64 sparc.gnu GPL3 Scalable Processor Architecture +_dA_ 8 32 vax GPL3 VAX _dAe 32 xtensa GPL3 XTensa CPU -adA_ 8 z80 GPL Zilog Z80 +adA_ 8 z80 GPL3 Zilog Z80 (by condret) +_dAe 8 16 32 64 ghidra LGPL3 SLEIGH Disassembler from Ghidra (by FXTi) ``` -> Note that "ad" in the first column means both assembler and disassembler are offered by a corresponding plugin. "_d" indicates disassembler, "a_" means only assembler is available. +> Note that "ad" in the first column means both assembler and disassembler are offered by a corresponding plugin. +> "_d" indicates disassembler, "a_" means only assembler is available. diff --git a/src/tools/rz-ax/intro.md b/src/tools/rz-ax/intro.md index 5c5e0d44..ae3f456c 100644 --- a/src/tools/rz-ax/intro.md +++ b/src/tools/rz-ax/intro.md @@ -1,16 +1,20 @@ # Rz-ax -The `rz-ax` utility comes with the rizin framework and aims to be a minimalistic expression evaluator for the shell. It is useful for making base conversions between floating point values, hexadecimal representations, hexpair strings to ascii, octal to integer. It supports endianness and can be used as a shell if no arguments are given. +The `rz-ax` utility comes with the Rizin framework and aims to be a minimalistic expression evaluator for the shell. +It is useful for making base conversions between floating point values, hexadecimal representations, hexpair strings +to ascii, octal to integer. It supports endianness and can be used as a shell if no arguments are given. -This is the help message of rz-ax, this tool can be used in the command-line or interactively (reading the values from stdin), so it can be used as a multi-base calculator. +This is the help message of rz-ax, this tool can be used in the command-line or interactively +(reading the values from stdin), so it can be used as a multi-base calculator. -Inside rizin, the functionality of rz-ax is available under the `%` command. For example: +Inside Rizin, the functionality of rz-ax is available under the `%` command. For example: ``` [0x00000000]> % 3+4 ``` -As you can see, the numeric expressions can contain mathematical expressions like addition, subtraction, as well as group operations with parenthesis. +As you can see, the numeric expressions can contain mathematical expressions like addition, subtraction, +as well as group operations with parenthesis. The syntax in which the numbers are represented define the base, for example: @@ -65,9 +69,7 @@ Usage: rz-ax [options] [expr ...] -r rz style output ; rz-ax -r 0x1234 -s hexstr -> raw ; rz-ax -s 43 4a 50 -S raw -> hexstr ; rz-ax -S < /bin/ls > ls.hex - -t Unix tstamp -> str ; rz-ax -t 1234567890 - -m MS-DOS tstamp -> str ; rz-ax -m 1234567890 - -W Win32 tstamp -> str ; rz-ax -W 1234567890 + -t tstamp -> str ; rz-ax -t 1234567890 -x hash string ; rz-ax -x linux osx -u units ; rz-ax -u 389289238 # 317.0M -w signed word ; rz-ax -w 16 0xffff @@ -76,39 +78,32 @@ Usage: rz-ax [options] [expr ...] ``` Some examples: + ``` $ rz-ax 3+0x80 0x83 -``` -``` + $ rz-ax 0x80+3 131 -``` -``` + $ echo 0x80+3 | rz-ax 131 -``` -``` + $ rz-ax -s 4142 AB -``` -``` + $ rz-ax -S AB 4142 -``` -``` + $ rz-ax -S < bin.foo ... -``` -``` + $ rz-ax -e 33 0x21000000 -``` -``` + $ rz-ax -e 0x21000000 33 -``` -``` + $ rz-ax -K 90203010 +--[0x10302090]---+ |Eo. . | diff --git a/src/tools/rz-bin/entrypoints.md b/src/tools/rz-bin/entrypoints.md index d50af7f1..a30af73f 100644 --- a/src/tools/rz-bin/entrypoints.md +++ b/src/tools/rz-bin/entrypoints.md @@ -1,16 +1,11 @@ ## Code Entrypoints -The `-e` option passed to rz-bin will show entrypoints for given binary. Two examples: -``` -$ rz-bin -e /bin/ls -[Entrypoints] -vaddr=0x00005310 paddr=0x00005310 baddr=0x00000000 laddr=0x00000000 haddr=0x00000018 type=program - -1 entrypoints +The `-e` option passed to rz-bin will show entrypoints for given binary. -$ rz-bin -er /bin/ls -fs symbols -f entry0 1 @ 0x00005310 -f entry0_haddr 1 @ 0x00000018 -s entry0 +``` +$ rz-bin -e /usr/bin/ls +[Entries] +vaddr paddr hvaddr haddr type +---------------------------------------------------- +0x00005880 0x00005880 0x00000018 0x00000018 program ``` diff --git a/src/tools/rz-bin/exports.md b/src/tools/rz-bin/exports.md index a99e9e2e..b3b0e84f 100644 --- a/src/tools/rz-bin/exports.md +++ b/src/tools/rz-bin/exports.md @@ -1,19 +1,18 @@ ## Exports rz-bin is able to find exports. For example: + ``` -$ rz-bin -E /usr/lib/librz_bin.so | head +$ rz-bin -E /usr/lib64/librz_bin.so | head +ERROR: Cannot determine entrypoint, using 0x00013ea0. [Exports] - -nth paddr vaddr bind type size lib name -――――――――――――――――――――――――――――――――――――――――――――――――――――― -210 0x000ae1f0 0x000ae1f0 GLOBAL FUNC 200 r_bin_java_print_exceptions_attr_summary -211 0x000afc90 0x000afc90 GLOBAL FUNC 135 r_bin_java_get_args -212 0x000b18e0 0x000b18e0 GLOBAL FUNC 35 r_bin_java_get_item_desc_from_bin_cp_list -213 0x00022d90 0x00022d90 GLOBAL FUNC 204 r_bin_class_add_method -214 0x000ae600 0x000ae600 GLOBAL FUNC 175 r_bin_java_print_fieldref_cp_summary -215 0x000ad880 0x000ad880 GLOBAL FUNC 144 r_bin_java_print_constant_value_attr_summary -216 0x000b7330 0x000b7330 GLOBAL FUNC 679 r_bin_java_print_element_value_summary -217 0x000af170 0x000af170 GLOBAL FUNC 65 r_bin_java_create_method_fq_str -218 0x00079b00 0x00079b00 GLOBAL FUNC 15 LZ4_createStreamDecode +nth paddr vaddr bind type size lib name +----------------------------------------------------------------------------------------------- +366 0x000f34f0 0x000f34f0 GLOBAL FUNC 94 rz_bin_dwarf_str_from_file +367 0x0009d020 0x0009d020 GLOBAL FUNC 85 reloc_targets_map_base_64 +368 0x000601b0 0x000601b0 GLOBAL FUNC 153 rz_bin_dex_resolve_class_by_idx +369 0x0001adc0 0x0001adc0 GLOBAL FUNC 70 rz_bin_reloc_size +370 0x00018580 0x00018580 GLOBAL FUNC 66 rz_bin_cur +371 0x000178c0 0x000178c0 GLOBAL FUNC 174 rz_bin_xtr_plugin_del +372 0x000e42a0 0x000e42a0 GLOBAL FUNC 33 rz_bin_dwarf_addr_free ``` diff --git a/src/tools/rz-bin/file_identification.md b/src/tools/rz-bin/file_identification.md index 351dd8e6..482d22d7 100644 --- a/src/tools/rz-bin/file_identification.md +++ b/src/tools/rz-bin/file_identification.md @@ -1,38 +1,51 @@ ## File Properties Identification -File type identification is done using `-I`. With this option, rz-bin prints information on a binary type, like its encoding, endianness, class, operating system: +File type identification is done using `-I`. With this option, rz-bin prints information on a binary type, +like its encoding, endianness, class, operating system: + ``` -$ rz-bin -I /bin/ls +$ rz-bin -I /usr/bin/ls +[Info] arch x86 -binsz 128456 +cpu N/A +baddr 0x00000000 +binsz 0x00024280 bintype elf bits 64 -canary true class ELF64 -crypto false -endian little -havecode true +compiler N/A +dbg_file N/A +endian LE +hdr.csum N/A +guid N/A intrp /lib64/ld-linux-x86-64.so.2 +laddr 0x00000000 lang c -linenum false -lsyms false machine AMD x86-64 architecture maxopsz 16 minopsz 1 -nx true os linux +cc N/A pcalign 0 -pic true -relocs false -relro partial +relro full rpath NONE -static false -stripped true subsys linux +stripped true +crypto false +havecode true va true +sanitiz false +static false +linenum false +lsyms false +canary true +PIE true +RELROCS false +NX true ``` -To make rz-bin output information in format that the main program, rizin, can understand, pass `-Ir` option to it: +To make rz-bin output information in format that the main program, Rizin, can understand, pass `-Ir` option to it: + ``` $ rz-bin -Ir /bin/ls e cfg.bigendian=false diff --git a/src/tools/rz-bin/imports.md b/src/tools/rz-bin/imports.md index 637920ba..2c9f2838 100644 --- a/src/tools/rz-bin/imports.md +++ b/src/tools/rz-bin/imports.md @@ -1,34 +1,34 @@ ## Imports -rz-bin is able to find imported objects by an executable, as well as their offsets in its PLT. This information is useful, for example, to understand what external function is invoked by `call` instruction. Pass `-i` flag to rz-bin to get a list of imports. An example: +rz-bin is able to find imported objects by an executable, as well as their offsets in its PLT. +This information is useful, for example, to understand what external function is invoked by `call` instruction. +Pass `-i` flag to rz-bin to get a list of imports. An example: ``` -$ rz-bin -i /bin/ls +$ rz-bin -i /usr/bin/ls [Imports] -nth vaddr bind type lib name -――――――――――――――――――――――――――――――――――――― - 1 0x000032e0 GLOBAL FUNC __ctype_toupper_loc - 2 0x000032f0 GLOBAL FUNC getenv - 3 0x00003300 GLOBAL FUNC sigprocmask - 4 0x00003310 GLOBAL FUNC __snprintf_chk - 5 0x00003320 GLOBAL FUNC raise - 6 0x00000000 GLOBAL FUNC free - 7 0x00003330 GLOBAL FUNC abort - 8 0x00003340 GLOBAL FUNC __errno_location - 9 0x00003350 GLOBAL FUNC strncmp - 10 0x00000000 WEAK NOTYPE _ITM_deregisterTMCloneTable - 11 0x00003360 GLOBAL FUNC localtime_r - 12 0x00003370 GLOBAL FUNC _exit - 13 0x00003380 GLOBAL FUNC strcpy - 14 0x00003390 GLOBAL FUNC __fpending - 15 0x000033a0 GLOBAL FUNC isatty - 16 0x000033b0 GLOBAL FUNC sigaction - 17 0x000033c0 GLOBAL FUNC iswcntrl - 18 0x000033d0 GLOBAL FUNC wcswidth - 19 0x000033e0 GLOBAL FUNC localeconv - 20 0x000033f0 GLOBAL FUNC mbstowcs - 21 0x00003400 GLOBAL FUNC readlink +nth vaddr bind type lib name +------------------------------------------------------------- +1 0x000036a0 GLOBAL FUNC __ctype_toupper_loc +2 0x000036b0 GLOBAL FUNC getenv +3 0x000036c0 GLOBAL FUNC sigprocmask +4 0x000036d0 GLOBAL FUNC __snprintf_chk +5 0x000036e0 GLOBAL FUNC raise +6 ---------- GLOBAL FUNC __libc_start_main +7 0x000036f0 GLOBAL FUNC abort +8 0x00003700 GLOBAL FUNC __errno_location +9 0x00003710 GLOBAL FUNC strncmp +10 ---------- WEAK NOTYPE _ITM_deregisterTMCloneTable +11 0x00003720 GLOBAL FUNC localtime_r +12 0x00003730 GLOBAL FUNC _exit +13 0x00003740 GLOBAL FUNC strcpy +14 0x00003750 GLOBAL FUNC __fpending +15 0x00003760 GLOBAL FUNC isatty +16 0x00003770 GLOBAL FUNC sigaction +17 0x00003780 GLOBAL FUNC iswcntrl +18 0x00003790 GLOBAL FUNC reallocarray +19 0x000037a0 GLOBAL FUNC localeconv +20 0x000037b0 GLOBAL FUNC faccessat +21 0x000037c0 GLOBAL FUNC readlink ... - ``` - diff --git a/src/tools/rz-bin/intro.md b/src/tools/rz-bin/intro.md index df163a0e..e4a5c743 100644 --- a/src/tools/rz-bin/intro.md +++ b/src/tools/rz-bin/intro.md @@ -1,61 +1,85 @@ # Rz-bin — Show Properties of a Binary -Rz-bin is a powerful tool to handle binaries, to get information on imports, sections, headers and other data. It can present this information in several formats accepted by other tools, including rizin itself. -rz-bin understands many file formats: Java CLASS, ELF, PE, Mach-O or any format supported by plugins, and it is able to obtain symbol import/exports, library dependencies, strings of data sections, xrefs, entrypoint address, sections, architecture type. +Rz-bin is a powerful tool to handle binaries, to get information on imports, sections, headers and other data. +It can present this information in several formats accepted by other tools, including Rizin itself. +rz-bin understands many file formats: Java CLASS, ELF, PE, Mach-O or any format supported by plugins, +and it is able to obtain symbol import/exports, library dependencies, strings of data sections, xrefs, +entrypoint address, sections, architecture type. ``` $ rz-bin -h -Usage: rz-bin [-AcdeEghHiIjlLMqrRsSvVxzZ] [-@ at] [-a arch] [-b bits] [-B addr] - [-C F:C:D] [-f str] [-m addr] [-n str] [-N m:M] [-P[-P] pdb] - [-o str] [-O str] [-k query] [-D lang symname] | file - -@ [addr] show section, symbol or import at addr - -A list sub-binaries and their arch-bits pairs - -a [arch] set arch (x86, arm, .. or _) - -b [bits] set bits (32, 64 ...) - -B [addr] override base address (pie bins) - -c list classes - -C [fmt:C:D] create [elf,mach0,pe] with Code and Data hexpairs (see -a) - -d show debug/dwarf information - -D lang name demangle symbol name (-D all for bin.demangle=true) - -e entrypoint - -E globally exportable symbols - -f [str] select sub-bin named str - -F [binfmt] force to use that bin plugin (ignore header check) - -g same as -SMZIHVResizcld (show all info) - -G [addr] load address . offset to header - -h this help message - -H header fields - -i imports (symbols imported from libraries) - -I binary info - -j output in json - -k [sdb-query] run sdb query. for example: '*' - -K [algo] calculate checksums (md5, sha1, ..) - -l linked libraries - -L [plugin] list supported bin plugins or plugin details - -m [addr] show source line at addr - -M main (show address of main symbol) - -n [str] show section, symbol or import named str - -N [min:max] force min:max number of chars per string (see -z and -zz) - -o [str] output file/folder for write operations (out by default) - -O [str] write/extract operations (-O help) - -p show physical addresses - -P show debug/pdb information - -PP download pdb file for binary - -q be quiet, just show fewer data - -qq show less info (no offset/size for -z for ex.) - -Q show load address used by dlopen (non-aslr libs) - -r rizin output - -R relocations - -s symbols - -S sections - -u unfiltered (no rename duplicated symbols/sections) - -v display version and quit +Usage: rz-bin [-AcdeEghHiIjlLMqrRsSUvVxzZ] [-@ at] [-a arch] [-b bits] [-B addr] + [-C F:C:D] [-f str] [-m addr] [-n str] [-N m:M] [-P pdb] + [-o str] [-O str] [-k query] [-D lang symname] file + -@ [addr] Show section, symbol, or import at the given address + -A List sub-binaries and their arch-bits pairs + -a [arch] Set arch (x86, arm, .. or _) + -b [bits] Set bits (32, 64 ...) + -B [addr] Override base address (pie bins) + -c List classes + -cc List classes in header format + -C [fmt:C:D] Create [elf,mach0,pe] with Code and Data hexpairs (see -a) + -d Show debug/dwarf information + -dd Load debug/dwarf information from debuginfod server + -D lang name Demangle symbol name (-D all for bin.demangle=true)z + -e Entrypoint + -ee Constructor/destructor entrypoints + -E Globally exportable symbols + -f [str] Select sub-bin named str + -F [binfmt] Force to use that bin plugin (ignore header check) + -g Same as -SMZIHVResizcld -SS -SSS -ee (show all info) + -G [addr] Load address . offset to header + -h Show this help + -H Header fields + -i Import (symbols imported from libraries) + -I Binary info + -j Output in JSON + -k [sdb-query] Run sdb query. for example: '*' + -K [algo] Calculate checksums (md5, sha1, ..) + -l Linked libraries + -L [plugin] List supported bin plugins or plugin details + -m [addr] Show source line at addr + -M Main (show address of main symbol) + -n [str] Show section, symbol or import named str + -N [min:max] Force min:max number of chars per string (see -z and -zz) + -o [str] Output file/folder for write operations (out by default) + -O [str] Write/extract operations (-O help) + -p Show physical addresses + -P Show debug/pdb information + -PP Download pdb file for binary + -q Quiet mode, just show fewer data + -qq Show less info (no offset/size for -z for ex.) + -Q Show load address used by dlopen (non-aslr libs) + -r Show output in rizin format + -R Show relocations + -s Symbols + -S Sections + -SS Segments + -SSS Sections mapping to segments + -T Display file signature + -u Unfiltered (no rename duplicated symbols/sections) + -U Resources + -v Show version information -V Show binary version information - -x extract bins contained in file - -X [fmt] [f] .. package in fat or zip the given files and bins contained in file - -z strings (from data section) - -zz strings (from raw bins [e bin.rawstr=1]) - -zzz dump raw strings to stdout (for huge files) - -Z guess size of binary program -...... + -w Display try/catch blocks + -x Extract bins contained in file + -X [fmt] [f] .. Package in fat or zip the given files and bins contained in file + -Y [fw file] Calculate all the possibles base address candidates of a firmware bin + -z Show strings (from data section) + -zz Show strings (from raw strings from bin) + -zzz Dump raw strings to stdout (for huge files) + -Z Guess size of binary program +Environment: + RZ_NOPLUGINS: # do not load shared plugins (speedup loading) + RZ_BIN_LANG: e bin.lang # assume lang for demangling + RZ_BIN_DEMANGLE=0:e bin.demangle # do not demangle symbols + RZ_BIN_MAXSTRBUF: e str.search.buffer_size # specify maximum buffer size + RZ_BIN_STRFILTER: e bin.str.filter # rizin -qc 'e bin.str.filter=??' - + RZ_BIN_STRPURGE: e bin.str.purge # try to purge false positives + RZ_BIN_DEBASE64: e bin.debase64 # try to debase64 all strings + RZ_BIN_PDBSERVER: e pdb.server # use alternative PDB server + RZ_BIN_SYMSTORE: e pdb.symstore # path to downstream symbol store + RZ_BIN_PREFIX: e bin.prefix # prefix symbols/sections/relocs with a specific string + RZ_BIN_DEBUGINFOD_URLS: e bin.dbginfo.debuginfod_urls # use alternative debuginfod server + RZ_CONFIG: ``` diff --git a/src/tools/rz-bin/libraries.md b/src/tools/rz-bin/libraries.md index 3bcf45ac..f869d104 100644 --- a/src/tools/rz-bin/libraries.md +++ b/src/tools/rz-bin/libraries.md @@ -1,69 +1,69 @@ ## List Libraries rz-bin can list libraries used by a binary with the `-l` option: + ``` $ rz-bin -l `which rizin` -[Linked libraries] -librz_core.so -librz_parse.so -librz_search.so -librz_cons.so -librz_config.so -librz_bin.so -librz_debug.so -librz_analysis.so -librz_reg.so -librz_bp.so -librz_io.so -librz_fs.so -librz_asm.so -librz_syscall.so -librz_hash.so -librz_magic.so -librz_flag.so -librz_egg.so -librz_crypto.so -librz_util.so -libpthread.so.0 +rz-bin -l `which rizin` +[Libs] +library +------------------ +librz_util.so.0.7 +librz_main.so.0.7 libc.so.6 -22 libraries ``` -Lets check the output with `ldd` command: + +Let's check the output with `ldd` command: + ``` $ ldd `which rizin` -linux-vdso.so.1 (0x00007fffba38e000) -librz_core.so => /usr/lib64/librz_core.so (0x00007f94b4678000) -librz_parse.so => /usr/lib64/librz_parse.so (0x00007f94b4425000) -librz_search.so => /usr/lib64/librz_search.so (0x00007f94b421f000) -librz_cons.so => /usr/lib64/librz_cons.so (0x00007f94b4000000) -librz_config.so => /usr/lib64/librz_config.so (0x00007f94b3dfa000) -librz_bin.so => /usr/lib64/librz_bin.so (0x00007f94b3afd000) -librz_debug.so => /usr/lib64/librz_debug.so (0x00007f94b38d2000) -librz_analysis.so => /usr/lib64/librz_analysis.so (0x00007f94b2fbd000) -librz_reg.so => /usr/lib64/librz_reg.so (0x00007f94b2db4000) -librz_bp.so => /usr/lib64/librz_bp.so (0x00007f94b2baf000) -librz_io.so => /usr/lib64/librz_io.so (0x00007f94b2944000) -librz_fs.so => /usr/lib64/librz_fs.so (0x00007f94b270e000) -librz_asm.so => /usr/lib64/librz_asm.so (0x00007f94b1c69000) -librz_syscall.so => /usr/lib64/librz_syscall.so (0x00007f94b1a63000) -librz_hash.so => /usr/lib64/librz_hash.so (0x00007f94b185a000) -librz_magic.so => /usr/lib64/librz_magic.so (0x00007f94b164d000) -librz_flag.so => /usr/lib64/librz_flag.so (0x00007f94b1446000) -librz_egg.so => /usr/lib64/librz_egg.so (0x00007f94b1236000) -librz_crypto.so => /usr/lib64/librz_crypto.so (0x00007f94b1016000) -librz_util.so => /usr/lib64/librz_util.so (0x00007f94b0d35000) -libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f94b0b15000) -libc.so.6 => /lib64/libc.so.6 (0x00007f94b074d000) -librz_lang.so => /usr/lib64/librz_lang.so (0x00007f94b0546000) -librz_socket.so => /usr/lib64/librz_socket.so (0x00007f94b0339000) -libm.so.6 => /lib64/libm.so.6 (0x00007f94affaf000) -libdl.so.2 => /lib64/libdl.so.2 (0x00007f94afdab000) -/lib64/ld-linux-x86-64.so.2 (0x00007f94b4c79000) -libssl.so.1.0.0 => /usr/lib64/libssl.so.1.0.0 (0x00007f94afb3c000) -libcrypto.so.1.0.0 => /usr/lib64/libcrypto.so.1.0.0 (0x00007f94af702000) -libutil.so.1 => /lib64/libutil.so.1 (0x00007f94af4ff000) -libz.so.1 => /lib64/libz.so.1 (0x00007f94af2e8000) + linux-vdso.so.1 (0x00007ffe302dd000) + librz_util.so.0.7 => /usr/lib64/librz_util.so.0.7 (0x00007f6bea740000) + librz_main.so.0.7 => /usr/lib64/librz_main.so.0.7 (0x00007f6bea705000) + libc.so.6 => /usr/lib64/libc.so.6 (0x00007f6bea531000) + libm.so.6 => /usr/lib64/libm.so.6 (0x00007f6bea486000) + libpcre2-8.so.0 => /usr/lib64/libpcre2-8.so.0 (0x00007f6bea3e8000) + libz.so.1 => /usr/lib64/libz.so.1 (0x00007f6bea3ce000) + liblzma.so.5 => /usr/lib64/liblzma.so.5 (0x00007f6bea39e000) + libcrypto.so.3 => /usr/lib64/libcrypto.so.3 (0x00007f6be9ee7000) + librz_demangler.so.0.7 => /usr/lib64/librz_demangler.so.0.7 (0x00007f6be9eb7000) + librz_socket.so.0.7 => /usr/lib64/librz_socket.so.0.7 (0x00007f6be9ea5000) + librz_flag.so.0.7 => /usr/lib64/librz_flag.so.0.7 (0x00007f6be9e99000) + librz_cons.so.0.7 => /usr/lib64/librz_cons.so.0.7 (0x00007f6be9e6f000) + librz_hash.so.0.7 => /usr/lib64/librz_hash.so.0.7 (0x00007f6be9e3c000) + librz_crypto.so.0.7 => /usr/lib64/librz_crypto.so.0.7 (0x00007f6be9e27000) + librz_il.so.0.7 => /usr/lib64/librz_il.so.0.7 (0x00007f6be9df9000) + librz_io.so.0.7 => /usr/lib64/librz_io.so.0.7 (0x00007f6be9dab000) + librz_reg.so.0.7 => /usr/lib64/librz_reg.so.0.7 (0x00007f6be9d9f000) + librz_bp.so.0.7 => /usr/lib64/librz_bp.so.0.7 (0x00007f6be9d96000) + librz_syscall.so.0.7 => /usr/lib64/librz_syscall.so.0.7 (0x00007f6be9d8d000) + librz_parse.so.0.7 => /usr/lib64/librz_parse.so.0.7 (0x00007f6be9d68000) + librz_asm.so.0.7 => /usr/lib64/librz_asm.so.0.7 (0x00007f6be9b2b000) + librz_egg.so.0.7 => /usr/lib64/librz_egg.so.0.7 (0x00007f6be9b18000) + librz_search.so.0.7 => /usr/lib64/librz_search.so.0.7 (0x00007f6be9b0f000) + librz_analysis.so.0.7 => /usr/lib64/librz_analysis.so.0.7 (0x00007f6be9876000) + librz_debug.so.0.7 => /usr/lib64/librz_debug.so.0.7 (0x00007f6be9822000) + librz_config.so.0.7 => /usr/lib64/librz_config.so.0.7 (0x00007f6be981a000) + librz_bin.so.0.7 => /usr/lib64/librz_bin.so.0.7 (0x00007f6be96ad000) + librz_sign.so.0.7 => /usr/lib64/librz_sign.so.0.7 (0x00007f6be969d000) + librz_core.so.0.7 => /usr/lib64/librz_core.so.0.7 (0x00007f6be93cb000) + librz_diff.so.0.7 => /usr/lib64/librz_diff.so.0.7 (0x00007f6be93c0000) + /lib64/ld-linux-x86-64.so.2 (0x00007f6bea860000) + libssl.so.3 => /usr/lib64/libssl.so.3 (0x00007f6be92d4000) + libxxhash.so.0 => /usr/lib64/libxxhash.so.0 (0x00007f6be92c8000) + libzip.so.5 => /usr/lib64/libzip.so.5 (0x00007f6be92ab000) + libcapstone.so.5 => /usr/lib64/libcapstone.so.5 (0x00007f6be8ba2000) + librz_type.so.0.7 => /usr/lib64/librz_type.so.0.7 (0x00007f6be8ab0000) + librz_magic.so.0.7 => /usr/lib64/librz_magic.so.0.7 (0x00007f6be8aab000) + liblz4.so.1 => /usr/lib64/liblz4.so.1 (0x00007f6be8a84000) + libzstd.so.1 => /usr/lib64/libzstd.so.1 (0x00007f6be89c9000) + libmspack.so.0 => /usr/lib64/libmspack.so.0 (0x00007f6be89b4000) + librz_lang.so.0.7 => /usr/lib64/librz_lang.so.0.7 (0x00007f6be89aa000) + libbz2.so.1 => /usr/lib64/libbz2.so.1 (0x00007f6be8996000) + libmagic.so.1 => /usr/lib64/libmagic.so.1 (0x00007f6be896b000) ``` -If you compare the outputs of `rz-bin -l` and `ldd`, you will notice that rz-bin lists fewer libraries than `ldd`. The reason is that rz-bin does not follow and does not show dependencies of libraries. Only direct binary dependencies are shown. +If you compare the outputs of `rz-bin -l` and `ldd`, you will notice that rz-bin lists fewer libraries than `ldd`. +The reason is that rz-bin does not follow and does not show dependencies of libraries. +Only direct binary dependencies are shown. diff --git a/src/tools/rz-bin/operations.md b/src/tools/rz-bin/operations.md index fe083788..149e1a25 100644 --- a/src/tools/rz-bin/operations.md +++ b/src/tools/rz-bin/operations.md @@ -3,16 +3,13 @@ and one of the predefined strings to specify the required operation. All the pos ``` $ rz-bin -O h -Operation string: - Change Entrypoint: e/0x8048000 - Dump Symbols: d/s/1024 - Dump Section: d/S/.text - Resize Section: r/.data/1024 - Remove RPATH: R - Add Library: a/l/libfoo.dylib - Change Permissions: p/.data/rwx - Show LDID entitlements: C +Usage: iO [expression]: + d/s/1024 dump symbols + d/S/.text dump section + c show Codesign data + C show LDID entitlements ``` + Let's run some examples. Change the entry point to 0x8041111 of *somefile.bin*: diff --git a/src/tools/rz-bin/program_sections.md b/src/tools/rz-bin/program_sections.md index 3ad8cee8..890656a9 100644 --- a/src/tools/rz-bin/program_sections.md +++ b/src/tools/rz-bin/program_sections.md @@ -1,44 +1,48 @@ ## Program Sections -rz-bin called with the `-S` option gives complete information about the sections of an executable. For each section the index, offset, size, alignment, type and permissions, are shown. The next example demonstrates this: +rz-bin called with the `-S` option gives complete information about the sections of an executable. +For each section the index, offset, size, alignment, type and permissions, are shown. The next example demonstrates this: + ``` -$ rz-bin -S /bin/ls +$ rz-bin -S /usr/bin/ls [Sections] - -nth paddr size vaddr vsize perm name -――――――――――――――――――――――――――――――――――――――――――――――――――――― -00 0x00000000 0 0x00000000 0 ---- -01 0x00000238 28 0x00000238 28 -r-- .interp -02 0x00000254 32 0x00000254 32 -r-- .note.ABI_tag -03 0x00000278 176 0x00000278 176 -r-- .gnu.hash -04 0x00000328 3000 0x00000328 3000 -r-- .dynsym -05 0x00000ee0 1412 0x00000ee0 1412 -r-- .dynstr -06 0x00001464 250 0x00001464 250 -r-- .gnu.version -07 0x00001560 112 0x00001560 112 -r-- .gnu.version_r -08 0x000015d0 4944 0x000015d0 4944 -r-- .rela.dyn -09 0x00002920 2448 0x00002920 2448 -r-- .rela.plt -10 0x000032b0 23 0x000032b0 23 -r-x .init -11 0x000032d0 1648 0x000032d0 1648 -r-x .plt -12 0x00003940 24 0x00003940 24 -r-x .plt.got -13 0x00003960 73931 0x00003960 73931 -r-x .text -14 0x00015a2c 9 0x00015a2c 9 -r-x .fini -15 0x00015a40 20201 0x00015a40 20201 -r-- .rodata -16 0x0001a92c 2164 0x0001a92c 2164 -r-- .eh_frame_hdr -17 0x0001b1a0 11384 0x0001b1a0 11384 -r-- .eh_frame -18 0x0001e390 8 0x0021e390 8 -rw- .init_array -19 0x0001e398 8 0x0021e398 8 -rw- .fini_array -20 0x0001e3a0 2616 0x0021e3a0 2616 -rw- .data.rel.ro -21 0x0001edd8 480 0x0021edd8 480 -rw- .dynamic -22 0x0001efb8 56 0x0021efb8 56 -rw- .got -23 0x0001f000 840 0x0021f000 840 -rw- .got.plt -24 0x0001f360 616 0x0021f360 616 -rw- .data -25 0x0001f5c8 0 0x0021f5e0 4824 -rw- .bss -26 0x0001f5c8 232 0x00000000 232 ---- .shstrtab +paddr size vaddr vsize align perm name type flags +--------------------------------------------------------------------------------------------- +0x00000000 0x0 ---------- 0x0 0x0 ---- NULL +0x00000318 0x1c 0x00000318 0x1c 0x0 -r-- .interp PROGBITS alloc +0x00000338 0x50 0x00000338 0x50 0x0 -r-- .note.gnu.property NOTE alloc +0x00000388 0x20 0x00000388 0x20 0x0 -r-- .note.ABI-tag NOTE alloc +0x000003a8 0x98 0x000003a8 0x98 0x0 -r-- .gnu.hash GNU_HASH alloc +0x00000440 0xaf8 0x00000440 0xaf8 0x0 -r-- .dynsym DYNSYM alloc +0x00000f38 0x564 0x00000f38 0x564 0x0 -r-- .dynstr STRTAB alloc +0x0000149c 0xea 0x0000149c 0xea 0x0 -r-- .gnu.version VERSYM alloc +0x00001588 0xe0 0x00001588 0xe0 0x0 -r-- .gnu.version_r VERNEED alloc +0x00001668 0x150 0x00001668 0x150 0x0 -r-- .rela.dyn RELA alloc +0x000017b8 0x948 0x000017b8 0x948 0x0 -r-- .rela.plt RELA alloc,info +0x00002100 0x50 0x00002100 0x50 0x0 -r-- .relr.dyn NUM alloc +0x00003000 0x1b 0x00003000 0x1b 0x0 -r-x .init PROGBITS alloc,execute +0x00003020 0x640 0x00003020 0x640 0x0 -r-x .plt PROGBITS alloc,execute +0x00003660 0x40 0x00003660 0x40 0x0 -r-x .plt.got PROGBITS alloc,execute +0x000036a0 0x630 0x000036a0 0x630 0x0 -r-x .plt.sec PROGBITS alloc,execute +0x00003cd0 0x14972 0x00003cd0 0x14972 0x0 -r-x .text PROGBITS alloc,execute +0x00018644 0xd 0x00018644 0xd 0x0 -r-x .fini PROGBITS alloc,execute +0x00019000 0x5453 0x00019000 0x5453 0x0 -r-- .rodata PROGBITS alloc +0x0001e454 0x9b4 0x0001e454 0x9b4 0x0 -r-- .eh_frame_hdr PROGBITS alloc +0x0001ee08 0x3348 0x0001ee08 0x3348 0x0 -r-- .eh_frame PROGBITS alloc +0x00022f50 0x8 0x00023f50 0x8 0x0 -rw- .init_array INIT_ARRAY write,alloc +0x00022f58 0x8 0x00023f58 0x8 0x0 -rw- .fini_array FINI_ARRAY write,alloc +0x00022f60 0xaf8 0x00023f60 0xaf8 0x0 -rw- .data.rel.ro PROGBITS write,alloc +0x00023a58 0x220 0x00024a58 0x220 0x0 -rw- .dynamic DYNAMIC write,alloc +0x00023c78 0x370 0x00024c78 0x370 0x0 -rw- .got PROGBITS write,alloc +0x00024000 0x280 0x00025000 0x280 0x0 -rw- .data PROGBITS write,alloc +0x00024280 0x0 0x00025280 0x12d8 0x0 -rw- .bss NOBITS write,alloc +0x00024280 0x105 ---------- 0x105 0x0 ---- .shstrtab STRTAB ``` With the `-Sr` option, rz-bin will flag the start/end of every section, and will pass the rest of information as a comment. + ``` -$ rz-bin -Sr /bin/ls | head +$ rz-bin -Sr /usr/bin/ls | head fs sections "f section. 1 0x00000000" "f section..interp 1 0x000002a8" diff --git a/src/tools/rz-bin/strings.md b/src/tools/rz-bin/strings.md index 1168c09f..fcd82aca 100644 --- a/src/tools/rz-bin/strings.md +++ b/src/tools/rz-bin/strings.md @@ -1,28 +1,27 @@ ## Strings -The `-z` option is used to list readable strings found in the .rodata section of ELF binaries, or the .text section of PE files. Example: +The `-z` option is used to list readable strings found in the .rodata section of ELF binaries, or the .text section +of PE files. Example: ``` -$ rz-bin -z /bin/ls | head +$ rz-bin -z /usr/bin/ls | head [Strings] -nth paddr vaddr len size section type string -――――――――――――――――――――――――――――――――――――――――――――――――――――――― -000 0x000160f8 0x000160f8 11 12 (.rodata) ascii dev_ino_pop -001 0x00016188 0x00016188 10 11 (.rodata) ascii sort_files -002 0x00016193 0x00016193 6 7 (.rodata) ascii posix- -003 0x0001619a 0x0001619a 4 5 (.rodata) ascii main -004 0x00016250 0x00016250 10 11 (.rodata) ascii ?pcdb-lswd -005 0x00016260 0x00016260 65 66 (.rodata) ascii # Configuration file for dircolors, a utility to help you set the -006 0x000162a2 0x000162a2 72 73 (.rodata) ascii # LS_COLORS environment variable used by GNU ls with the --color option. -007 0x000162eb 0x000162eb 56 57 (.rodata) ascii # Copyright (C) 1996-2018 Free Software Foundation, Inc. -008 0x00016324 0x00016324 70 71 (.rodata) ascii # Copying and distribution of this file, with or without modification, -009 0x0001636b 0x0001636b 76 77 (.rodata) ascii # are permitted provided the copyright notice and this notice are preserved. +nth paddr vaddr len size section type string +------------------------------------------------------- +0 0x00019007 0x00019007 5 6 .rodata ascii =fff? +1 0x00019630 0x00019630 11 12 .rodata ascii dev_ino_pop +2 0x000196a8 0x000196a8 10 11 .rodata ascii sort_files +3 0x000196b3 0x000196b3 6 7 .rodata ascii posix- +4 0x000196ba 0x000196ba 4 5 .rodata ascii main +5 0x00019790 0x00019790 10 11 .rodata ascii ?pcdb-lswd +6 0x000197a0 0x000197a0 65 66 .rodata ascii # Configuration file for dircolors, a utility to help you set the ``` With the `-zr` option, this information is represented as a rizin commands list. It can be used in a rizin session to automatically create a flag space called "strings" pre-populated with flags for all strings found by rz-bin. Furthermore, this script will mark corresponding byte ranges as strings instead of code. ``` -$ rz-bin -zr /bin/ls | head + +$ rz-bin -zr /usr/bin/ls | head fs stringsf str.dev_ino_pop 12 @ 0x000160f8 Cs 12 @ 0x000160f8 f str.sort_files 11 @ 0x00016188 diff --git a/src/tools/rz-bin/symbols.md b/src/tools/rz-bin/symbols.md index 159b4c87..9dab778a 100644 --- a/src/tools/rz-bin/symbols.md +++ b/src/tools/rz-bin/symbols.md @@ -3,26 +3,24 @@ With rz-bin, the generated symbols list format is similar to the imports list. Use the `-s` option to get it: ``` -rz-bin -s /bin/ls | head +$ rz-bin -s /usr/bin/ls | head [Symbols] - -nth paddr vaddr bind type size lib name -―――――――――――――――――――――――――――――――――――――――――――――――――――――― -110 0x000150a0 0x000150a0 GLOBAL FUNC 56 _obstack_allocated_p -111 0x0001f600 0x0021f600 GLOBAL OBJ 8 program_name -112 0x0001f620 0x0021f620 GLOBAL OBJ 8 stderr -113 0x00014f90 0x00014f90 GLOBAL FUNC 21 _obstack_begin_1 -114 0x0001f600 0x0021f600 WEAK OBJ 8 program_invocation_name -115 0x0001f5c0 0x0021f5c0 GLOBAL OBJ 8 alloc_failed_handler -116 0x0001f5f8 0x0021f5f8 GLOBAL OBJ 8 optarg -117 0x0001f5e8 0x0021f5e8 GLOBAL OBJ 8 stdout -118 0x0001f5e0 0x0021f5e0 GLOBAL OBJ 8 program_short_name +nth paddr vaddr bind type size lib name +--------------------------------------------------------------------------------- +104 ---------- 0x00025280 GLOBAL OBJ 8 __progname +105 ---------- 0x00025290 GLOBAL OBJ 4 optind +107 ---------- 0x000252a8 WEAK OBJ 8 program_invocation_name +108 ---------- 0x000252a8 GLOBAL OBJ 8 __progname_full +109 0x00024200 0x00025200 GLOBAL OBJ 8 obstack_alloc_failed_handler +110 ---------- 0x000252c0 GLOBAL OBJ 8 stderr +111 ---------- 0x00025280 WEAK OBJ 8 program_invocation_short_name ``` -With the `-sr` option rz-bin produces a rizin script instead. It can later be passed to the core to automatically flag all symbols and to define corresponding byte ranges as functions and data blocks. +With the `-sr` option rz-bin produces a rizin script instead. It can later be passed to the core to automatically +flag all symbols and to define corresponding byte ranges as functions and data blocks. ``` -$ rz-bin -sr /bin/ls | head +$ rz-bin -sr /usr/bin/ls | head fs symbols f sym.obstack_allocated_p 56 0x000150a0 f sym.program_invocation_name 8 0x0021f600 diff --git a/src/tools/rz-diff/binary_diffing.md b/src/tools/rz-diff/binary_diffing.md index 0301c58a..3e0eb6a6 100644 --- a/src/tools/rz-diff/binary_diffing.md +++ b/src/tools/rz-diff/binary_diffing.md @@ -4,7 +4,8 @@ For bulk processing, you may want to have a higher-level overview of differences. -The `-d` option serves to calculate the distance between the two binaries using either myers algorithm or the levenshtein algorithm. +The `-d` option serves to calculate the distance between the two binaries using either Myers algorithm or +the Levenshtein algorithm. ``` -d --------> myers (myers algorithm) @@ -13,33 +14,34 @@ The `-d` option serves to calculate the distance between the two binaries using ### Myers algorithm: -In the [Myers](https://epubs.siam.org/doi/10.1137/S0097539794264810) algorithm for edit distance, the cost of an insertion or deletion is 1 and the cost of a replacement is 2. -The theorem leads directly to an O(k) algorithm for incrementally computing a new solution from an old one, as contrasts the O(k2 ) time required to compute a solution from scratch. -Thus the algorithm performs well when the two strings are similar. +In the [Myers](https://epubs.siam.org/doi/10.1137/S0097539794264810) algorithm for edit distance, the cost of +an insertion or deletion is 1 and the cost of a replacement is 2. +The theorem leads directly to an O(k) algorithm for incrementally computing a new solution from an old one, +as contrasts the O(k2) time required to compute a solution from scratch. +Thus, the algorithm performs well when the two strings are similar. -`rz-diff -d myers /bin/true /bin/false` - -output: -``` -similarity: 0.974 -distance: 2046 +```shell +$ rz-diff -d myers /usr/bin/true /usr/bin/false +similarity: 0.997 +distance: 242 ``` ### Levenshtein distance: -[Levenshtein](https://en.wikipedia.org/wiki/Levenshtein_distance) distance is a string metric for measuring the difference between two sequences. Informally, the Levenshtein distance between two words is the minimum number of single-character edits (insertions, deletions or substitutions) required to change one word into the other. - -`rz-diff -d leven /bin/true /bin/false` +[Levenshtein](https://en.wikipedia.org/wiki/Levenshtein_distance) distance is a string metric for measuring +the difference between two sequences. Informally, the Levenshtein distance between two words is the minimum number +of single-character edits (insertions, deletions or substitutions) required to change one word into the other. -output: -``` -similarity: 0.974 -distance: 2046 +```shell +$ rz-diff -d leven /bin/true /bin/false +similarity: 0.997 +distance: 130 ``` ## Hexadecimal Diffing: -`-H` The hexadecimal displays the hexdump of file0 vs file1 in a side-by-side window. Navigational keys allows easily parsing through the hexdump of the files individually. +`-H` The hexadecimal displays the hexdump of file0 vs file1 in a side-by-side window. Navigational keys allows easily +parsing through the hexdump of the files individually. - `1` and `2` : to move to the next or previous page. - `Z` and `A` : allows parsing forward and backward through file0, byte by byte. @@ -54,25 +56,32 @@ distance: 2046 The bytes that differ are: `rz-diff -H /bin/true /bin/false` ``` -.---------- [ 0 | 9958]( true )-------------------------------------------------------------------- [ 0 | 9958]( false )---------------------------------------------------------. -| 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F | -|0x0000000000000000 | 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 | .ELF............ | 0x0000000000000000 | 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 | .ELF............ | -|0x0000000000000010 | 03 00 3e 00 01 00 00 00 10 26 00 00 00 00 00 00 | ..>......&...... | 0x0000000000000010 | 03 00 3e 00 01 00 00 00 20 26 00 00 00 00 00 00 | ..>..... &...... | -|0x0000000000000020 | 40 00 00 00 00 00 00 00 d8 91 00 00 00 00 00 00 | @............... | 0x0000000000000020 | 40 00 00 00 00 00 00 00 d8 91 00 00 00 00 00 00 | @............... | -|0x0000000000000030 | 00 00 00 00 40 00 38 00 0d 00 40 00 1e 00 1d 00 | ....@.8...@..... | 0x0000000000000030 | 00 00 00 00 40 00 38 00 0d 00 40 00 1e 00 1d 00 | ....@.8...@..... | -|0x0000000000000040 | 06 00 00 00 04 00 00 00 40 00 00 00 00 00 00 00 | ........@....... | 0x0000000000000040 | 06 00 00 00 04 00 00 00 40 00 00 00 00 00 00 00 | ........@....... | -|0x0000000000000050 | 40 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 | @.......@....... | 0x0000000000000050 | 40 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 | @.......@....... | -|0x0000000000000060 | d8 02 00 00 00 00 00 00 d8 02 00 00 00 00 00 00 | ................ | 0x0000000000000060 | d8 02 00 00 00 00 00 00 d8 02 00 00 00 00 00 00 | ................ | -|0x0000000000000070 | 08 00 00 00 00 00 00 00 03 00 00 00 04 00 00 00 | ................ | 0x0000000000000070 | 08 00 00 00 00 00 00 00 03 00 00 00 04 00 00 00 | ................ | -|0x0000000000000080 | 18 03 00 00 00 00 00 00 18 03 00 00 00 00 00 00 | ................ | 0x0000000000000080 | 18 03 00 00 00 00 00 00 18 03 00 00 00 00 00 00 | ................ | -|0x0000000000000090 | 18 03 00 00 00 00 00 00 1c 00 00 00 00 00 00 00 | ................ | 0x0000000000000090 | 18 03 00 00 00 00 00 00 1c 00 00 00 00 00 00 00 | ................ | +$ rz-bin -s /usr/bin/ls | head +[Symbols] +nth paddr vaddr bind type size lib name +--------------------------------------------------------------------------------- +104 ---------- 0x00025280 GLOBAL OBJ 8 __progname +105 ---------- 0x00025290 GLOBAL OBJ 4 optind +107 ---------- 0x000252a8 WEAK OBJ 8 program_invocation_name +┌─────────────────────────── [a8c8]( true )─────────────────────────────────────────────────────────────────────────── [a8c8]( false )─────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F │ +│ 0x0000000000000000 | 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 | .ELF............ | 0x0000000000000000 | 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 | .ELF............ | │ +│ 0x0000000000000010 | 03 00 3e 00 01 00 00 00 80 26 00 00 00 00 00 00 | ..>......&...... | 0x0000000000000010 | 03 00 3e 00 01 00 00 00 80 26 00 00 00 00 00 00 | ..>......&...... | │ +│ 0x0000000000000020 | 40 00 00 00 00 00 00 00 88 a1 00 00 00 00 00 00 | @............... | 0x0000000000000020 | 40 00 00 00 00 00 00 00 88 a1 00 00 00 00 00 00 | @............... | │ +│ 0x0000000000000030 | 00 00 00 00 40 00 38 00 0d 00 40 00 1d 00 1c 00 | ....@.8...@..... | 0x0000000000000030 | 00 00 00 00 40 00 38 00 0d 00 40 00 1d 00 1c 00 | ....@.8...@..... | │ +│ 0x0000000000000040 | 06 00 00 00 04 00 00 00 40 00 00 00 00 00 00 00 | ........@....... | 0x0000000000000040 | 06 00 00 00 04 00 00 00 40 00 00 00 00 00 00 00 | ........@....... | │ +│ 0x0000000000000050 | 40 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 | @.......@....... | 0x0000000000000050 | 40 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 | @.......@....... | │ +│ 0x0000000000000060 | d8 02 00 00 00 00 00 00 d8 02 00 00 00 00 00 00 | ................ | 0x0000000000000060 | d8 02 00 00 00 00 00 00 d8 02 00 00 00 00 00 00 | ................ | │ +│ 0x0000000000000070 | 08 00 00 00 00 00 00 00 03 00 00 00 04 00 00 00 | ................ | 0x0000000000000070 | 08 00 00 00 00 00 00 00 03 00 00 00 04 00 00 00 | ................ | │ +│ 0x0000000000000080 | 18 03 00 00 00 00 00 00 18 03 00 00 00 00 00 00 | ................ | 0x0000000000000080 | 18 03 00 00 00 00 00 00 18 03 00 00 00 00 00 00 | ................ | │ +│ 0x0000000000000090 | 18 03 00 00 00 00 00 00 1c 00 00 00 00 00 00 00 | ................ | 0x0000000000000090 | 18 03 00 00 00 00 00 00 1c 00 00 00 00 00 00 00 | ................ | │ ... -|0x00000000000002f0 | 30 9c 00 00 00 00 00 00 30 9c 00 00 00 00 00 00 | 0.......0....... | 0x00000000000002f0 | 30 9c 00 00 00 00 00 00 30 9c 00 00 00 00 00 00 | 0.......0....... | -|0x0000000000000300 | d0 03 00 00 00 00 00 00 d0 03 00 00 00 00 00 00 | ................ | 0x0000000000000300 | d0 03 00 00 00 00 00 00 d0 03 00 00 00 00 00 00 | ................ | -`------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------' - 1 2 -/+0x320 | Z A file0 +/-1 | C D file1 +/-1 | G B end/begin | N M next/prev | \//\ +/-16 | < > +/-1 | : seek +│ 0x0000000000000360 | 07 00 00 00 00 00 00 00 01 00 01 c0 04 00 00 00 | ................ | 0x0000000000000360 | 07 00 00 00 00 00 00 00 01 00 01 c0 04 00 00 00 | ................ | │ +│ 0x0000000000000370 | 19 00 00 00 00 00 00 00 02 00 01 c0 04 00 00 00 | ................ | 0x0000000000000370 | 19 00 00 00 00 00 00 00 02 00 01 c0 04 00 00 00 | ................ | │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + 1 2 -/+0x390 | Z A file0 +/-1 | C D file1 +/-1 | G B end/begin | N M next/prev | ᐯ ᐱ +/-16 | ᐸ ᐳ +/-1 | : seek ``` `-S` mode allows you to adjust the window size of the hexadecimal view to your preference. @@ -85,27 +94,28 @@ Example : `rz-diff -HS 120x20 /bin/true /bin/false` `t` option computes the difference between two files based on its type. ``` - -t --------> bytes (raw bytes in the files) - |----> lines (compares text files) - |----> functions (compares functions found in the files) - |----> classes (compares classes found in the files) - |----> command (compares command output returned when executed) - |----> entries (compares entries found in the files) - |----> fields (compares fields found in the files) - |----> graphs (compares 2 functions and outputs in graphviz/dot format) - |----> format (compares text files) - |----> imports (compares imports found in the files) - |----> libraries (compares libraries found in the files) - |----> sections (compares sections found in the files) - |----> strings (compares sections found in the files) - |----> symbols (compares symbols found in the files) - + -t [type] Compute the difference between two files based on its type: + bytes | compare raw bytes in the files (only for small files) + lines | compare text files + functions | compare functions found in the files + classes | compare classes found in the files + command | compare command output returned when executed in both files + | require -0 and -1 is optional + entries | compare entries found in the files + fields | compare fields found in the files + graphs | compare 2 functions and outputs in graphviz/dot format + | require -0 and -1 is optional + imports | compare imports found in the files + libraries | compare libraries found in the files + sections | compare sections found in the files + strings | compare strings found in the files + symbols | compare symbols found in the files ``` ### Diffing ASCII-text files: -` $ rz-diff -t lines genuine cracked` ``` +$ rz-diff -t lines genuine cracked --- genuine +++ cracked @@ -1,1 +1,1 @@ @@ -117,27 +127,83 @@ Example : `rz-diff -HS 120x20 /bin/true /bin/false` It this mode, it will give you three columns for all functions: "First file offset", "Percentage of matching" and "Second file offset". -` $ rz-diff -t functions /bin/true /bin/false` - ``` - sym.imp.__fprintf_chk 11 0x00000000000024e0 | MATCH (1.000000) | 0x00000000000024e0 11 sym.imp.__fprintf_chk - sym.imp.mbsinit 11 0x00000000000024f0 | MATCH (1.000000) | 0x00000000000024f0 11 sym.imp.mbsinit - sym.imp.iswprint 11 0x0000000000002500 | MATCH (1.000000) | 0x0000000000002500 11 sym.imp.iswprint - sym.imp.__ctype_b_loc 11 0x0000000000002510 | MATCH (1.000000) | 0x0000000000002510 11 sym.imp.__ctype_b_loc - fcn.00002640 34 0x0000000000002640 | UNMATCH (0.058824) | 0x0000000000002650 34 fcn.00002650 - fcn.00002700 840 0x0000000000002700 | UNMATCH (0.221163) | 0x0000000000002710 840 fcn.00002710 - fcn.00002b30 176 0x0000000000002b30 | UNMATCH (0.173077) | 0x0000000000002b40 176 fcn.00002b40 - fcn.00002bf0 208 0x0000000000002bf0 | SIMILAR (0.961538) | 0x0000000000002c00 208 fcn.00002c00 - fcn.00002cd0 4627 0x0000000000002cd0 | SIMILAR (0.993949) | 0x0000000000002ce0 4627 fcn.00002ce0 +rz-diff -t functions /usr/bin/true /usr/bin/false +.-----------------------------------------------------------------------------------------------------------------------------------. +| name0 | size0 | addr0 | type | similarity | addr1 | size1 | name1 | +)-----------------------------------------------------------------------------------------------------------------------------------( +| fcn.000022c0 | 10 | 0x000022c0 | COMPLETE | 1.000000 | 0x000022c0 | 10 | fcn.000022c0 | +| sym.imp.free | 10 | 0x000022d0 | COMPLETE | 1.000000 | 0x000022d0 | 10 | sym.imp.free | +| sym.imp.abort | 10 | 0x000022e0 | COMPLETE | 1.000000 | 0x000022e0 | 10 | sym.imp.abort | +| sym.imp.__errno_location | 10 | 0x000022f0 | COMPLETE | 1.000000 | 0x000022f0 | 10 | sym.imp.__errno_location | +| sym.imp.strncmp | 10 | 0x00002300 | COMPLETE | 1.000000 | 0x00002300 | 10 | sym.imp.strncmp | +| sym.imp._exit | 10 | 0x00002310 | COMPLETE | 1.000000 | 0x00002310 | 10 | sym.imp._exit | +| sym.imp.__fpending | 10 | 0x00002320 | COMPLETE | 1.000000 | 0x00002320 | 10 | sym.imp.__fpending | +| sym.imp.reallocarray | 10 | 0x00002330 | COMPLETE | 1.000000 | 0x00002330 | 10 | sym.imp.reallocarray | +| sym.imp.textdomain | 10 | 0x00002340 | COMPLETE | 1.000000 | 0x00002340 | 10 | sym.imp.textdomain | +| sym.imp.fclose | 10 | 0x00002350 | COMPLETE | 1.000000 | 0x00002350 | 10 | sym.imp.fclose | +| sym.imp.bindtextdomain | 10 | 0x00002360 | COMPLETE | 1.000000 | 0x00002360 | 10 | sym.imp.bindtextdomain | +| sym.imp.dcgettext | 10 | 0x00002370 | COMPLETE | 1.000000 | 0x00002370 | 10 | sym.imp.dcgettext | +| sym.imp.__ctype_get_mb_cur_max | 10 | 0x00002380 | COMPLETE | 1.000000 | 0x00002380 | 10 | sym.imp.__ctype_get_mb_cur_max | +| sym.imp.strlen | 10 | 0x00002390 | COMPLETE | 1.000000 | 0x00002390 | 10 | sym.imp.strlen | +| sym.imp.__stack_chk_fail | 10 | 0x000023a0 | COMPLETE | 1.000000 | 0x000023a0 | 10 | sym.imp.__stack_chk_fail | +| sym.imp.strrchr | 10 | 0x000023b0 | COMPLETE | 1.000000 | 0x000023b0 | 10 | sym.imp.strrchr | +| sym.imp.lseek | 10 | 0x000023c0 | COMPLETE | 1.000000 | 0x000023c0 | 10 | sym.imp.lseek | +| sym.imp.memset | 10 | 0x000023d0 | COMPLETE | 1.000000 | 0x000023d0 | 10 | sym.imp.memset | +| sym.imp.mbrtoc32 | 10 | 0x000023e0 | COMPLETE | 1.000000 | 0x000023e0 | 10 | sym.imp.mbrtoc32 | +| sym.imp.memcmp | 10 | 0x000023f0 | COMPLETE | 1.000000 | 0x000023f0 | 10 | sym.imp.memcmp | +| sym.imp.fputs_unlocked | 10 | 0x00002400 | COMPLETE | 1.000000 | 0x00002400 | 10 | sym.imp.fputs_unlocked | +| sym.imp.calloc | 10 | 0x00002410 | COMPLETE | 1.000000 | 0x00002410 | 10 | sym.imp.calloc | +| sym.imp.strcmp | 10 | 0x00002420 | COMPLETE | 1.000000 | 0x00002420 | 10 | sym.imp.strcmp | +| sym.imp.fputc_unlocked | 10 | 0x00002430 | COMPLETE | 1.000000 | 0x00002430 | 10 | sym.imp.fputc_unlocked | +| sym.imp.memcpy | 10 | 0x00002440 | COMPLETE | 1.000000 | 0x00002440 | 10 | sym.imp.memcpy | +| sym.imp.fileno | 10 | 0x00002450 | COMPLETE | 1.000000 | 0x00002450 | 10 | sym.imp.fileno | +| sym.imp.malloc | 10 | 0x00002460 | COMPLETE | 1.000000 | 0x00002460 | 10 | sym.imp.malloc | +| sym.imp.nl_langinfo | 10 | 0x00002480 | COMPLETE | 1.000000 | 0x00002480 | 10 | sym.imp.nl_langinfo | +| sym.imp.__freading | 10 | 0x00002490 | COMPLETE | 1.000000 | 0x00002490 | 10 | sym.imp.__freading | +| sym.imp.realloc | 10 | 0x000024a0 | COMPLETE | 1.000000 | 0x000024a0 | 10 | sym.imp.realloc | +| sym.imp.setlocale | 10 | 0x000024b0 | COMPLETE | 1.000000 | 0x000024b0 | 10 | sym.imp.setlocale | +| sym.imp.__printf_chk | 10 | 0x000024c0 | COMPLETE | 1.000000 | 0x000024c0 | 10 | sym.imp.__printf_chk | +| sym.imp.error | 10 | 0x000024d0 | COMPLETE | 1.000000 | 0x000024d0 | 10 | sym.imp.error | +| sym.imp.exit | 10 | 0x00002500 | COMPLETE | 1.000000 | 0x00002500 | 10 | sym.imp.exit | +| sym.imp.fwrite | 10 | 0x00002510 | COMPLETE | 1.000000 | 0x00002510 | 10 | sym.imp.fwrite | +| sym.imp.__fprintf_chk | 10 | 0x00002520 | COMPLETE | 1.000000 | 0x00002520 | 10 | sym.imp.__fprintf_chk | +| sym.imp.mbsinit | 10 | 0x00002530 | COMPLETE | 1.000000 | 0x00002530 | 10 | sym.imp.mbsinit | +| sym.imp.iswprint | 10 | 0x00002540 | COMPLETE | 1.000000 | 0x00002540 | 10 | sym.imp.iswprint | +| sym.imp.__ctype_b_loc | 10 | 0x00002550 | COMPLETE | 1.000000 | 0x00002550 | 10 | sym.imp.__ctype_b_loc | +| fcn.000026b0 | 34 | 0x000026b0 | COMPLETE | 1.000000 | 0x000026b0 | 34 | fcn.000026b0 | +| fcn.00002770 | 833 | 0x00002770 | PARTIAL | 0.979592 | 0x00002770 | 833 | fcn.00002770 | +| fcn.00002ba0 | 166 | 0x00002ba0 | PARTIAL | 0.993976 | 0x00002ba0 | 166 | fcn.00002ba0 | +| fcn.00002c50 | 79 | 0x00002c50 | PARTIAL | 0.987342 | 0x00002c50 | 79 | fcn.00002c50 | +| fcn.00002cb0 | 220 | 0x00002cb0 | PARTIAL | 0.972727 | 0x00002cb0 | 220 | fcn.00002cb0 | +| fcn.00002da0 | 5447 | 0x00002da0 | PARTIAL | 0.998348 | 0x00002da0 | 5447 | fcn.00002da0 | +| fcn.00004370 | 486 | 0x00004370 | COMPLETE | 1.000000 | 0x00004370 | 486 | fcn.00004370 | +| fcn.00004c20 | 120 | 0x00004c20 | COMPLETE | 1.000000 | 0x00004c20 | 120 | fcn.00004c20 | +| fcn.00005070 | 1092 | 0x00005070 | PARTIAL | 0.991758 | 0x00005070 | 1092 | fcn.00005070 | +| fcn.000055c0 | 210 | 0x000055c0 | COMPLETE | 1.000000 | 0x000055c0 | 210 | fcn.000055c0 | +| fcn.000057f0 | 28 | 0x000057f0 | COMPLETE | 1.000000 | 0x000057f0 | 28 | fcn.000057f0 | +| fcn.00005a80 | 241 | 0x00005a80 | COMPLETE | 1.000000 | 0x00005a80 | 241 | fcn.00005a80 | +| fcn.00005c20 | 54 | 0x00005c20 | COMPLETE | 1.000000 | 0x00005c20 | 54 | fcn.00005c20 | +| fcn.00005d20 | 56 | 0x00005d20 | PARTIAL | 0.964286 | 0x00005d20 | 56 | fcn.00005d20 | +| fcn.00005d60 | 93 | 0x00005d60 | COMPLETE | 1.000000 | 0x00005d60 | 93 | fcn.00005d60 | +| fcn.00005dc0 | 88 | 0x00005dc0 | COMPLETE | 1.000000 | 0x00005dc0 | 88 | fcn.00005dc0 | +| fcn.00005e20 | 124 | 0x00005e20 | COMPLETE | 1.000000 | 0x00005e20 | 124 | fcn.00005e20 | +| fcn.00005eb0 | 63 | 0x00005eb0 | COMPLETE | 1.000000 | 0x00005eb0 | 63 | fcn.00005eb0 | +| fcn.00005ef0 | 109 | 0x00005ef0 | COMPLETE | 1.000000 | 0x00005ef0 | 109 | fcn.00005ef0 | +| fcn.00005f70 | 54 | 0x00005f70 | PARTIAL | 0.962963 | 0x00005f70 | 54 | fcn.00005f70 | +| fcn.00005fb0 | 180 | 0x00005fb0 | PARTIAL | 0.994444 | 0x00005fb0 | 180 | fcn.00005fb0 | +| fcn.00006070 | 116 | 0x00006070 | COMPLETE | 1.000000 | 0x00006070 | 116 | fcn.00006070 | +| fcn.000060f0 | 128 | 0x000060f0 | COMPLETE | 1.000000 | 0x000060f0 | 128 | fcn.000060f0 | +| fcn.000061b0 | 18 | 0x000061b0 | COMPLETE | 1.000000 | 0x000061b0 | 18 | fcn.000061b0 | +`-----------------------------------------------------------------------------------------------------------------------------------' ``` ### Diffing classes in binaries: -`rz-diff -t functions /bin/true /bin/false` - ``` ---- /bin/true -+++ /bin/false +rz-diff -t classes /usr/bin/true /usr/bin/false +--- /usr/bin/true ++++ /usr/bin/false ``` ### Commands @@ -145,73 +211,68 @@ It this mode, it will give you three columns for all functions: "First file offs ### Diffing entries in binaries -`rz-diff -t entries /bin/true /bin/false` - ``` ---- /bin/true -+++ /bin/false -@@ -1,3 +1,3 @@ --virt: 0x00000000000026f0 phys: 0x00000000000026f0 entry init --virt: 0x00000000000026b0 phys: 0x00000000000026b0 entry fini --virt: 0x0000000000002610 phys: 0x0000000000002610 entry program -+virt: 0x0000000000002700 phys: 0x0000000000002700 entry init -+virt: 0x00000000000026c0 phys: 0x00000000000026c0 entry fini -+virt: 0x0000000000002620 phys: 0x0000000000002620 entry program - +rz-diff -t entries /usr/bin/true /usr/bin/false +--- /usr/bin/true ++++ /usr/bin/false ``` ### Diffing fields in binaries: -`rz-diff -t fields /bin/true /bin/false ` - ``` ---- /bin/true -+++ /bin/false +rz-diff -t fields /usr/bin/true /usr/bin/false +--- /usr/bin/true ++++ /usr/bin/false ``` ### Diffing sections in binaries: -`rz-diff -t sections /bin/true /bin/false` - ``` - ---- /bin/true -+++ /bin/false +rz-diff -t sections /usr/bin/true /usr/bin/false +--- /usr/bin/true ++++ /usr/bin/false ``` ### Diffing strings in binaries: -`rz-diff -t strings /bin/true /bin/false` - ``` ---- /bin/true -+++ /bin/false -@@ -11,7 +11,7 @@ - Written by %s, %s, %s,\n%s, %s, %s, %s,\nand %s.\n +rz-diff -t strings /usr/bin/true /usr/bin/false +--- /usr/bin/true ++++ /usr/bin/false +@@ -10,7 +10,7 @@ + --help display this help and exit + + Written by %s, %s, %s, +%s, %s, %s, %s, +and %s. + Copyright %s %d Free Software Foundation, Inc. - --help display this help and exit\n -Exit with a status code indicating success. +Exit with a status code indicating failure. - Written by %s, %s, %s,\n%s, %s, %s, and %s.\n - Written by %s, %s, %s,\n%s, %s, and %s.\n + Written by %s, %s, %s, +%s, %s, %s, and %s. + + Written by %s, %s, %s, +%s, %s, and %s. + https://www.gnu.org/software/coreutils/ -@@ -51,7 +51,7 @@ - --help +@@ -59,6 +59,6 @@ ASCII POSIX + UTF-8 +false shell - %s\n\n - 8.30 + %s + + -true ``` ### Diffing symbols in binaries: -`rz-diff -t symbols /bin/true /bin/false` - ``` ---- /bin/true -+++ /bin/false +rz-diff -t symbols /usr/bin/true /usr/bin/false +--- /usr/bin/true ++++ /usr/bin/false ``` diff --git a/src/tools/rz-diff/intro.md b/src/tools/rz-diff/intro.md index 979c2a60..cce96686 100644 --- a/src/tools/rz-diff/intro.md +++ b/src/tools/rz-diff/intro.md @@ -5,43 +5,48 @@ ``` $ rz-diff -h Usage: rz-diff [options] - -a [arch] specify architecture plugin to use (x86, arm, ..) - -b [bits] specify register size for arch (16 (thumb), 32, 64, ..) - -d [algo] compute edit distance based on the choosen algorithm: - myers | Eugene W. Myers' O(ND) algorithm (no substitution) - leven | Levenshtein O(N^2) algorithm (with substitution) - -H hexadecimal visual mode - -h this help message - -j json output - -q quite output - -v show version information - -A compare virtual and physical addresses - -C disable colors - -T show timestamp information - -S [WxH] sets the width and height of the terminal for visual mode - -0 [cmd] input for file0 when option -t 'commands' is given. - the same value will be set for file1, if -1 is not set. - -1 [cmd] input for file1 when option -t 'commands' is given. - -t [type] compute the difference between two files based on its type: - bytes | compares raw bytes in the files (only for small files) - lines | compares text files - functions | compares functions found in the files - classes | compares classes found in the files - command | compares command output returned when executed in both files - | requires -0 and -1 is optional - entries | compares entries found in the files - fields | compares fields found in the files - graphs | compares 2 functions and outputs in graphviz/dot format - | requires -0 and -1 is optional - imports | compares imports found in the files - libraries | compares libraries found in the files - sections | compares sections found in the files - strings | compares strings found in the files - symbols | compares symbols found in the files - palette colors can be changed by adding the following lines - inside the $HOME/.rizinrc file - ec diff.unknown blue | offset color - ec diff.match green | match color - ec diff.unmatch red | mismatch color + -a [arch] Specify architecture plugin to use (x86, arm, ..) + -b [bits] Specify register size for arch (16 (thumb), 32, 64, ..) + -d [algo] Compute edit distance based on the chosen algorithm: + myers | Eugene W. Myers' O(ND) algorithm (no substitution) + leven | Levenshtein O(N^2) algorithm (with substitution) + ssdeep | Context triggered piecewise hashing comparison + -i Use command line arguments instead of files (only for -d) + -H Hexadecimal visual mode + -h Show this help + -j JSON output + -q Quite output + -V Show version information + -v Be more verbose (stderr output) + -e [k=v] Set an evaluable config variable + -A Compare virtual and physical addresses + -B Run 'aaa' when loading the bin + -C Disable colors + -T Show timestamp information + -S [WxH] Set the width and height of the terminal for visual mode + -0 [cmd] Input for file0 when option -t 'commands' is given. + The same value will be set for file1, if -1 is not set. + -1 [cmd] Input for file1 when option -t 'commands' is given. + -t [type] Compute the difference between two files based on its type: + bytes | compare raw bytes in the files (only for small files) + lines | compare text files + functions | compare functions found in the files + classes | compare classes found in the files + command | compare command output returned when executed in both files + | require -0 and -1 is optional + entries | compare entries found in the files + fields | compare fields found in the files + graphs | compare 2 functions and outputs in graphviz/dot format + | require -0 and -1 is optional + imports | compare imports found in the files + libraries | compare libraries found in the files + sections | compare sections found in the files + strings | compare strings found in the files + symbols | compare symbols found in the files +palette colors can be changed by adding the following lines +inside the $HOME/.rizinrc file +ec diff.unknown blue | offset color +ec diff.match green | match color +ec diff.unmatch red | mismatch color ``` diff --git a/src/tools/rz-find/intro.md b/src/tools/rz-find/intro.md index 78a48257..bf31f6fe 100644 --- a/src/tools/rz-find/intro.md +++ b/src/tools/rz-find/intro.md @@ -4,78 +4,93 @@ ``` $ rz-find -h -Usage: rz-find [-mXnzZhqv] [-a align] [-b sz] [-f/t from/to] [-[e|s|S] str] [-x hex] -|file|dir .. - -a [align] only accept aligned hits - -b [size] set block size - -e [regex] search for regex matches (can be used multiple times) - -E [cmd] execute command on each match - -f [from] start searching from address 'from' - -h show this help - -i identify filetype (rizin -nqcpm file) - -j output in JSON - -m magic search, file-type carver - -M [str] set a binary mask to be applied on keywords - -n do not stop on read errors - -r print using rizin commands - -s [str] search for a specific string (can be used multiple times) - -S [str] search for a specific wide string (can be used multiple times). Assumes str is UTF-8. - -t [to] stop search at address 'to' - -q quiet - do not show headings (filenames) above matching contents (default for searching a single file) - -v print version and exit - -x [hex] search for hexpair string (909090) (can be used multiple times) - -X show hexdump of search results - -z search for zero-terminated strings - -Z show string found on each search hit +Usage: rz-find [-mXnzZhqv] [-a align] [-b sz] [-f/t from/to] [-[e|s|w|S|I] str] [-x hex] -|file|dir .. + -a [align] Only accept aligned hits + -b [size] Set block size + -e [regex] Search for regex matches (can be used multiple times) + -f [from] Start searching from address 'from' + -F [file] Read the contents of the file and use it as keyword + -h Show this help + -i Identify filetype (rizin -nqcpm file) + -j Output in JSON + -m Magic search, file-type carver + -M [str] Set a binary mask to be applied on keywords + -n Do not stop on read errors + -r Print using rizin commands + -s [str] Search for a specific string (can be used multiple times) + -w [str] Search for a specific wide string (can be used multiple times). Assumes str is UTF-8. + -I [str] Search for an entry in import table. + -S [str] Search for a symbol in symbol table. + -t [to] Stop search at address 'to' + -q Quiet - do not show headings (filenames) above matching contents (default for searching a single file) + -v Show version information + -x [hex] Search for hexpair string (909090) (can be used multiple times) + -X Show hexdump of search results + -z Search for zero-terminated strings + -Z Show string found on each search hit ``` That's how to use it, first we'll search for "lib" inside the `/bin/ls` binary. + ``` -$ rz-find -s lib /bin/ls -0x5f9 -0x675 -0x679 -... -$ +$ rz-find -s lib /usr/bin/ls +0x319 +0x11f3 +0x13b7 +0x1b5ea +0x1b792 ``` -Note that the output is pretty minimal, and shows the offsets where the string `lib` is found. We can then use this output to feed other tools. + +Note that the output is pretty minimal, and shows the offsets where the string `lib` is found. We can then use +this output to feed other tools. Counting results: ``` -$ rz-find -s lib /bin/ls | wc -l +$ rz-find -s lib /usr/bin/ls | wc -l +5 ``` Displaying results with context: ``` -$ export F=/bin/ls +$ export F=/usr/bin/ls $ for a in `rz-find -s lib $F` ; do \ rizin -ns $a -qc'x 32' $F ; done -0x000005f9 6c69 622f 6479 6c64 .. lib/dyld........ -0x00000675 6c69 622f 6c69 6275 .. lib/libutil.dyli -0x00000679 6c69 6275 7469 6c2e .. libutil.dylib... -0x00000683 6c69 6200 000c 0000 .. lib......8...... -0x000006a5 6c69 622f 6c69 626e .. lib/libncurses.5 -0x000006a9 6c69 626e 6375 7273 .. libncurses.5.4.d -0x000006ba 6c69 6200 0000 0c00 .. lib.......8..... -0x000006dd 6c69 622f 6c69 6253 .. lib/libSystem.B. -0x000006e1 6c69 6253 7973 7465 .. libSystem.B.dyli -0x000006ef 6c69 6200 0000 0000 .. lib......&...... +- offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF +0x00000319 6c69 6236 342f 6c64 2d6c 696e 7578 2d78 lib64/ld-linux-x +0x00000329 3836 2d36 342e 736f 2e32 0000 0000 0004 86-64.so.2...... +- offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF +0x000011f3 6c69 6263 5f73 7461 7274 5f6d 6169 6e00 libc_start_main. +0x00001203 6973 7770 7269 6e74 0073 6967 7072 6f63 iswprint.sigproc +- offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF +0x000013b7 6c69 6263 2e73 6f2e 3600 474c 4942 435f libc.so.6.GLIBC_ +0x000013c7 4142 495f 4454 5f52 454c 5200 474c 4942 ABI_DT_RELR.GLIB +- offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF +0x0001b5ea 6c69 6273 2f00 5554 462d 3800 e280 9900 libs/.UTF-8..... +0x0001b5fa a1af 0022 00a1 0765 00e2 8098 0060 0073 ..."...e.....`.s +- offset - 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF +0x0001b792 6c69 622f 7873 7472 746f 6c2e 6300 4153 lib/xstrtol.c.AS +0x0001b7a2 4349 4900 0000 6361 6e6e 6f74 2064 6574 CII...cannot det ``` -rz-find can also be used as a replacement of `file` to identify the mimetype of a file using the internal magic database of rizin. +rz-find can also be used as a replacement of `file` to identify the mimetype of a file using the internal magic +database of Rizin. ``` -$ rz-find -i /bin/ls -0x00000000 1 Mach-O +$ rz-find -i /usr/bin/ls +0x00000000 1 ELF 64-bit LSB shared object, x86-64, version 1 ``` -Also works as a `strings` replacement, similar to what you do with rz-bin -z, but without caring about parsing headers and obeying binary sections. +Also works as a `strings` replacement, similar to what you do with `rz-bin -z`, but without caring about parsing +headers and obeying binary sections. ``` -$ rz-find -z /bin/ls| grep http -0x000076e5 %http://www.apple.com/appleca/root.crl0\r -0x00007ae6 https://www.apple.com/appleca/0 -0x00007fa9 )http://www.apple.com/certificateauthority0 -0x000080ab $http://crl.apple.com/codesigning.crl0 +$ rz-find -z /usr/bin/ls | grep http +# https://wiki.xiph.org/MIME_Types_and_File_Extensions +# https://wiki.xiph.org/MIME_Types_and_File_Extensions +https://www.gnu.org/gethelp/ +https://www.gnu.org/software/coreutils/ +Report any translation bugs to +https://gnu.org/licenses/gpl.html ``` diff --git a/src/tools/rz-gg/rz-gg.md b/src/tools/rz-gg/rz-gg.md index 11687b05..2a0b5ea7 100644 --- a/src/tools/rz-gg/rz-gg.md +++ b/src/tools/rz-gg/rz-gg.md @@ -9,15 +9,17 @@ for x86, x86-64, and ARM. You can also access all the rz-gg commands from the rizin shell. They are present under `g`. See `g?` for more information about how to use them. -By default it will compile it's own `rz-gg` language, but you can also compile C -code using GCC or Clang shellcodes depending on the file extension. Lets create +By default, it will compile its own `rz-gg` language, but you can also compile C +code using GCC or Clang shellcodes depending on the file extension. Let's create C file called `helloworld.c`: + ```c int main() { write(1, "Hello World\n", 13); return 0; } ``` + ``` $ rz-gg -a x86 -b32 helloworld.c e900000000488d3516000000bf01000000b80400000248c7c20d0000000f0531c0c348656c6c6f20576f726c640a00 @@ -128,4 +130,4 @@ $ xxd helloworld | head -n 3 00000020: 2822 3133 3942 222c 202a 5b0a 3078 3535 ("139B", *[.0x55 ``` -You can set the output format to C, PE, ELF, Mach-O, raw, python or javascript. \ No newline at end of file +You can set the output format to C, PE, ELF, Mach-O, raw, python or javascript. diff --git a/src/tools/rz-hash/encode.md b/src/tools/rz-hash/encode.md index ee9a2742..ce39cfd4 100644 --- a/src/tools/rz-hash/encode.md +++ b/src/tools/rz-hash/encode.md @@ -13,6 +13,7 @@ For example, to encode a string into base64 use the following line: ``` $ rz-hash -E base64 -s hello +0x00000000-0x00000005 base64: aGVsbG8= ``` You can decode it by using the -D flag instead of -E. @@ -23,7 +24,7 @@ For encrypting data check the crypto hash plugins: ``` $ rz-hash -L | grep "^E\|^_D" -ED____ aes-ecb MS-PL Karl Malbrain +ED____ aes-ecb LGPL3 Nettle project (algorithm implementation), pancake (plugin) ED____ aes-cbc LGPL-3 rakholiyajenish.07 ED____ blowfish LGPL3 kishorbhat ED____ cps2 LGPL-3 pancake,esanfelix,pof @@ -36,6 +37,7 @@ _D____ ror LGPL-3 pancake ED____ rot LGPL-3 pancake ED____ serpent-ecb LGPL-3 NicsTr ED____ xor LGPL-3 pancake +ED____ sm4-ecb LGPL-3 0xSh4dy ``` Here's an example usage to encrypt a string using rz-hash: diff --git a/src/tools/rz-hash/intro.md b/src/tools/rz-hash/intro.md index da2f2341..8902b406 100644 --- a/src/tools/rz-hash/intro.md +++ b/src/tools/rz-hash/intro.md @@ -1,6 +1,7 @@ # Rz-hash -The `rz-hash` tool can be used to compute checksums of files, disk devices or strings. By block or entirely using many different hash algorithms. +The `rz-hash` tool can be used to compute checksums of files, disk devices or strings. By block or entirely using +many different hash algorithms. This tool is also capable of doing some encoding/decoding operations like base64 and xor encoding. @@ -8,13 +9,16 @@ This is an example usage: ``` $ rz-hash -a md5 -s "hello world" +string: 0x00000000-0x0000000b md5: 5eb63bbbe01eeed093cb22bb8f5acdc3 ``` -Note that rz-hash also permits to read from files in a stream, so you don't need 4GB of ram to compute the hash of a 4GB file. +Note that rz-hash also permits to read from files in a stream, so you don't need 4GB of ram to compute the hash +of a 4GB file. ## Hashing by blocks -When doing forensics, it is useful to compute partial checksums. The reason for that is because you may want to split a huge file into small portions that are easier to identify by contents or regions in the disk. +When doing forensics, it is useful to compute partial checksums. The reason for that is because you may want to split +a huge file into small portions that are easier to identify by contents or regions in the disk. This will spot the same hash for blocks containing the same contents. For example, if is filled with zeros. @@ -23,7 +27,17 @@ It can also be used to find which blocks have changed between more than one samp This can be useful when analyzing ram dumps from a virtual machine for example. Use this command for this: ``` -$ rz-hash -b 1M -B -a sha256 /bin/ls +$ rz-hash -b 1M -B -a sha256 /usr/bin/ls +/usr/bin/ls: 0x00000000-0x00000001 sha256: 620bfdaa346b088fb49998d92f19a7eaf6bfc2fb0aee015753966da1028cb731 +/usr/bin/ls: 0x00000001-0x00000002 sha256: a9f51566bd6705f7ea6ad54bb9deb449f795582d6529a0e22207b8981233ec58 +/usr/bin/ls: 0x00000002-0x00000003 sha256: 72dfcfb0c470ac255cde83fb8fe38de8a128188e03ea5ba5b2a93adbea1062fa +/usr/bin/ls: 0x00000003-0x00000004 sha256: f67ab10ad4e4c53121b6a5fe4da9c10ddee905b978d3788d2723d7bfacbe28a9 +/usr/bin/ls: 0x00000004-0x00000005 sha256: dbc1b4c900ffe48d575b5da5c638040125f65db0fe3e24494b76ea986457d986 +/usr/bin/ls: 0x00000005-0x00000006 sha256: 4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a +/usr/bin/ls: 0x00000006-0x00000007 sha256: 4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a +/usr/bin/ls: 0x00000007-0x00000008 sha256: 6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d +/usr/bin/ls: 0x00000008-0x00000009 sha256: 6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d +... ``` ## Hashing with rz-bin @@ -31,31 +45,67 @@ $ rz-hash -b 1M -B -a sha256 /bin/ls The rz-bin tool parses the binary headers of the files, but it also have the ability to use the rhash plugins to compute checksum of sections in the binary. ``` -$ rz-bin -K md5 -S /bin/ls +$ rz-bin -K md5 -S /usr/bin/ls +[Sections] +paddr size vaddr vsize align perm name type flags md5 +------------------------------------------------------------------------------------------------------------------------------ +0x00000000 0x0 ---------- 0x0 0x0 ---- NULL +0x00000318 0x1c 0x00000318 0x1c 0x0 -r-- .interp PROGBITS alloc 91476dafa5ef669483350538fa6ec4cb +0x00000338 0x50 0x00000338 0x50 0x0 -r-- .note.gnu.property NOTE alloc b020406d0153b9a8b093dc5320cf1858 +0x00000388 0x20 0x00000388 0x20 0x0 -r-- .note.ABI-tag NOTE alloc 3ac31b2ebb8a59ed3542fd7de044fbeb +0x000003a8 0x98 0x000003a8 0x98 0x0 -r-- .gnu.hash GNU_HASH alloc 66294f432dce133a6929d846de86169d +0x00000440 0xaf8 0x00000440 0xaf8 0x0 -r-- .dynsym DYNSYM alloc 774725bcfcbbb51079e3fd2973c3aa41 +0x00000f38 0x564 0x00000f38 0x564 0x0 -r-- .dynstr STRTAB alloc 1fe7093dac1a162ed80703082430bc66 +0x0000149c 0xea 0x0000149c 0xea 0x0 -r-- .gnu.version VERSYM alloc b28fd251f91eb3d1be5a1d68a83499b0 +0x00001588 0xe0 0x00001588 0xe0 0x0 -r-- .gnu.version_r VERNEED alloc 443b0d1c61039ec732b43fea288b2e65 +0x00001668 0x150 0x00001668 0x150 0x0 -r-- .rela.dyn RELA alloc dbbe4ca304f452516463a3d53b66410d +0x000017b8 0x948 0x000017b8 0x948 0x0 -r-- .rela.plt RELA alloc,info 0b50c15bd0eea13bd064b74d0854f55a +0x00002100 0x50 0x00002100 0x50 0x0 -r-- .relr.dyn NUM alloc 888d536429b8c32d615ee09e6f48bc8e +0x00003000 0x1b 0x00003000 0x1b 0x0 -r-x .init PROGBITS alloc,execute 34780ae97b075b73ee8ed3c08929bb2d +0x00003020 0x640 0x00003020 0x640 0x0 -r-x .plt PROGBITS alloc,execute 6fb9ff6fe33cf9724ff1881cae5ddc3a +0x00003660 0x40 0x00003660 0x40 0x0 -r-x .plt.got PROGBITS alloc,execute c3f69157c5b164dde89f4d93d01a2fbb +0x000036a0 0x630 0x000036a0 0x630 0x0 -r-x .plt.sec PROGBITS alloc,execute 100ad3792d4e9afca167c23151a4b81d +0x00003cd0 0x14972 0x00003cd0 0x14972 0x0 -r-x .text PROGBITS alloc,execute 90839f5851706a61bb3f87f0fba97be2 +0x00018644 0xd 0x00018644 0xd 0x0 -r-x .fini PROGBITS alloc,execute 7f954257b760b556cf4c5678b9b1ba6c +0x00019000 0x5453 0x00019000 0x5453 0x0 -r-- .rodata PROGBITS alloc a6452d25e0a10ac8e59df34eba3237fc +0x0001e454 0x9b4 0x0001e454 0x9b4 0x0 -r-- .eh_frame_hdr PROGBITS alloc ad1d1656aae20f61478eafef5735ad02 +0x0001ee08 0x3348 0x0001ee08 0x3348 0x0 -r-- .eh_frame PROGBITS alloc abb13e58f45411038e69c9ea963cf132 +0x00022f50 0x8 0x00023f50 0x8 0x0 -rw- .init_array INIT_ARRAY write,alloc b7e2f760e1a646fcbdbc90e96e559b98 +0x00022f58 0x8 0x00023f58 0x8 0x0 -rw- .fini_array FINI_ARRAY write,alloc c8fb88b08b2f528a3e4be0ed611f5ceb +0x00022f60 0xaf8 0x00023f60 0xaf8 0x0 -rw- .data.rel.ro PROGBITS write,alloc c269cce7d0d0d50ffefd4cc280e04dda +0x00023a58 0x220 0x00024a58 0x220 0x0 -rw- .dynamic DYNAMIC write,alloc 361d08a3c3a340966033f22e686839c2 +0x00023c78 0x370 0x00024c78 0x370 0x0 -rw- .got PROGBITS write,alloc 7731c689f4ca5fa4d6327707418424ce +0x00024000 0x280 0x00025000 0x280 0x0 -rw- .data PROGBITS write,alloc 408649c72414b451e99cabc3b72cc401 +0x00024280 0x0 0x00025280 0x12d8 0x0 -rw- .bss NOBITS write,alloc +0x00024280 0x105 ---------- 0x105 0x0 ---- .shstrtab STRTAB 87b94c07525325673cb9303007685933 ``` ## Obtaining hashes within rizin session -To calculate a checksum of current block when running rizin, use the `ph` command. Pass an algorithm name to it as a parameter. An example session: +To calculate a checksum of current block when running Rizin, use the `ph` command. Pass an algorithm name to it +as a parameter. An example session: ``` -$ rizin /bin/ls -[0x08049790]> bf entry0 -[0x08049790]> ph md5 -d2994c75adaa58392f953a448de5fba7 +$ rizin /usr/bin/ls +[0x00005880]> bf entry0 +[0x00005880]> ph md5 +6334c2ae05c2421c687f516772b817da ``` You can use all hashing algorithms supported by `rz-hash`: ``` -[0x00000000]> ph? +[0x00000000]> phl algorithm license author -md4 LGPL3 deroad -md5 RSA-MD RSA Data Security, Inc. -sha1 LGPL3 deroad -sha256 BSD-3 Aaron D. Gifford -sha384 BSD-3 Aaron D. Gifford -sha512 BSD-3 Aaron D. Gifford +md2 LGPL3 swedenspy +md4 Apache 2.0 OpenSSL Team +md5 Apache 2.0 OpenSSL Team +sha1 Apache 2.0 OpenSSL Team +sha256 Apache 2.0 OpenSSL Team +sha384 Apache 2.0 OpenSSL Team +sha512 Apache 2.0 OpenSSL Team +sm3 Apache 2.0 OpenSSL Team +blake3 CC0 Samuel Neves,Jack O'Connor fletcher8 LGPL3 deroad fletcher16 LGPL3 deroad fletcher32 LGPL3 deroad @@ -115,12 +165,14 @@ crc64iso LGPL3 deroad xor8 LGPL3 deroad xor16 LGPL3 deroad xxhash32 LGPL3 deroad +ssdeep LGPL3 deroad parity LGPL3 deroad entropy LGPL3 deroad entropy_fract LGPL3 deroad ``` -The `ph` command accepts an optional numeric argument to specify length of byte range to be hashed, instead of default block size. For example: +The `ph` command accepts an optional numeric argument to specify length of byte range to be hashed, instead of +default block size. For example: ``` [0x08049A80]> ph md5 32 diff --git a/src/tools/rz-hash/rz-hash_tool.md b/src/tools/rz-hash/rz-hash_tool.md index e01038e8..a7ec9ef9 100644 --- a/src/tools/rz-hash/rz-hash_tool.md +++ b/src/tools/rz-hash/rz-hash_tool.md @@ -5,35 +5,35 @@ The rz-hash tool can be used to calculate checksums and has functions of byte st ``` $ rz-hash -h Usage: rz-hash [-vhBkjLq] [-b S] [-a A] [-c H] [-E A] [-D A] [-s S] [-x S] [-f O] [-t O] [files|-] ... - -v Shows version - -h Shows this help page - - Input read from stdin instead from a file - -a algo Hash algorithm to use and you can specify multiple ones by - appending a comma (example: sha1,md4,md5,sha256) - -B Outputs the calculated value for each block - -b size Sets the block size - -c value Compare calculated value with a given one (hexadecimal) - -e endian Sets the endianness (default: 'big' accepted: 'big' or 'little') - -D algo Decrypt the given input; use -S to set key and -I to set IV (if needed) - -E algo Encrypt the given input; use -S to set key and -I to set IV (if needed) - -f from Starts the calculation at given offset - -t to Stops the calculation at given offset - -I iv Sets the initialization vector (IV) - -i times Repeat the calculation N times - -j Outputs the result as a JSON structure - -k Outputs the calculated value using openssh's randomkey algorithm - -L List all algorithms - -q Sets quiet mode (use -qq to get only the calculated value) - -S seed Sets the seed for -a, use '^' to append it before the input, use '@' - prefix to load it from a file and '-' from read it - -K key Sets the hmac key for -a and the key for -E/-D, use '@' prefix to - load it from a file and '-' from read it - from stdin (you can combine them) - -s string Input read from a zero-terminated string instead from a file - -x hex Input read from a hexadecimal value instead from a file - - All the inputs (besides -s/-x/-c) can be hexadecimal or strings - if 's:' prefix is specified + -v Show version information + -h Show this help + - Input read from stdin instead from a file + -a algo Hash algorithm to use and you can specify multiple ones by + Appending a comma (example: sha1,md4,md5,sha256) + -B Output the calculated value for each block + -b size Set the block size + -c value Compare calculated value with a given one (hexadecimal) + -e endian Set the endianness (default: 'big' accepted: 'big' or 'little') + -D algo Decrypt the given input; use -S to set key and -I to set IV (if needed) + -E algo Encrypt the given input; use -S to set key and -I to set IV (if needed) + -f from Start the calculation at given offset + -t to Stop the calculation at given offset + -I iv Set the initialization vector (IV) + -i times Repeat the calculation N times + -j Output the result as a JSON structure + -k Output the calculated value using openssh's randomkey algorithm + -L List all algorithms + -q Set quiet mode (use -qq to get only the calculated value) + -S seed Set the seed for -a, use '^' to append it before the input, use '@' + Prefix to load it from a file and '-' from read it + -K key Set the hmac key for -a and the key for -E/-D, use '@' prefix to + Load it from a file and '-' from read it + From stdin (you can combine them) + -s string Input read from a zero-terminated string instead from a file + -x hex Input read from a hexadecimal value instead from a file + + All the input (besides -s/-x/-c) can be hexadecimal or strings + If 's:' prefix is specified ``` To obtain an MD5 hash value of a text string, use the `-s` option: @@ -43,78 +43,83 @@ $ rz-hash -q -a md5 -s 'hello world' string: md5: 5eb63bbbe01eeed093cb22bb8f5acdc3 ``` -It is possible to calculate hash values for contents of files. But do not attempt to do it for very large files because rz-hash buffers the whole input in memory before computing the hash. +It is possible to calculate hash values for contents of files. But do not attempt to do it for very large files +because rz-hash buffers the whole input in memory before computing the hash. To apply all algorithms known to rz-hash, use `all` as an algorithm name: ``` -$ rz-hash -a all /bin/ls -/bin/ls: 0x00000000-0x00022a70 md4: 4f34e90ff19613695bfe8ecbddc1ae6d -/bin/ls: 0x00000000-0x00022a70 md5: 27ac5e2f7573020dbff16b3b9c03e678 -/bin/ls: 0x00000000-0x00022a70 sha1: 0ca5dcdf79d00cbc893a5cca29695f2afddc193d -/bin/ls: 0x00000000-0x00022a70 sha256: f8b09fba9fda9ffebae86611261cf628bd71022fb4348d876974f7c48ddcc6d5 -/bin/ls: 0x00000000-0x00022a70 sha384: ae8404125de3ae798fe85635533dc93744136d812fe279eaa92d1f31896ba4bf9fa0e240ab1b3f234b870f243c5754da -/bin/ls: 0x00000000-0x00022a70 sha512: 4ec188a733c402e277f60d59240aba3279b2fdc261ac479188a28aab899b81be4281283988ccbf78f7ca214eb3fbfb49811743ccdb62a459a52414075e9eae8f -/bin/ls: 0x00000000-0x00022a70 fletcher8: 8d -/bin/ls: 0x00000000-0x00022a70 fletcher16: 8d78 -/bin/ls: 0x00000000-0x00022a70 fletcher32: a5e716ad -/bin/ls: 0x00000000-0x00022a70 fletcher64: ae7b1b422fd65611 -/bin/ls: 0x00000000-0x00022a70 adler32: e1028925 -/bin/ls: 0x00000000-0x00022a70 crc8smbus: 9e -/bin/ls: 0x00000000-0x00022a70 crc8cdma2000: 73 -/bin/ls: 0x00000000-0x00022a70 crc8darc: ac -/bin/ls: 0x00000000-0x00022a70 crc8dvbs2: f7 -/bin/ls: 0x00000000-0x00022a70 crc8ebu: 6d -/bin/ls: 0x00000000-0x00022a70 crc8icode: 85 -/bin/ls: 0x00000000-0x00022a70 crc8itu: cb -/bin/ls: 0x00000000-0x00022a70 crc8maxim: 38 -/bin/ls: 0x00000000-0x00022a70 crc8rohc: ef -/bin/ls: 0x00000000-0x00022a70 crc8wcdma: 52 -/bin/ls: 0x00000000-0x00022a70 crc15can: 65ec -/bin/ls: 0x00000000-0x00022a70 crc16: f97a -/bin/ls: 0x00000000-0x00022a70 crc16citt: e73b -/bin/ls: 0x00000000-0x00022a70 crc16usb: 2ed9 -/bin/ls: 0x00000000-0x00022a70 crc16hdlc: d7cd -/bin/ls: 0x00000000-0x00022a70 crc16augccitt: 2366 -/bin/ls: 0x00000000-0x00022a70 crc16buypass: 9eac -/bin/ls: 0x00000000-0x00022a70 crc16cdma2000: d62b -/bin/ls: 0x00000000-0x00022a70 crc16dds110: 02d4 -/bin/ls: 0x00000000-0x00022a70 crc16dectr: 9262 -/bin/ls: 0x00000000-0x00022a70 crc16dectx: 9263 -/bin/ls: 0x00000000-0x00022a70 crc16dnp: d64b -/bin/ls: 0x00000000-0x00022a70 crc16en13757: 9227 -/bin/ls: 0x00000000-0x00022a70 crc16genibus: 18c4 -/bin/ls: 0x00000000-0x00022a70 crc16maxim: 0685 -/bin/ls: 0x00000000-0x00022a70 crc16mcrf4xx: 2832 -/bin/ls: 0x00000000-0x00022a70 crc16riello: 0e39 -/bin/ls: 0x00000000-0x00022a70 crc16t10dif: dbb5 -/bin/ls: 0x00000000-0x00022a70 crc16teledisk: 4fee -/bin/ls: 0x00000000-0x00022a70 crc16tms37157: ba7d -/bin/ls: 0x00000000-0x00022a70 crca: a525 -/bin/ls: 0x00000000-0x00022a70 crc16kermit: b131 -/bin/ls: 0x00000000-0x00022a70 crc16modbus: d126 -/bin/ls: 0x00000000-0x00022a70 crc16x25: d7cd -/bin/ls: 0x00000000-0x00022a70 crc16xmodem: 27a2 -/bin/ls: 0x00000000-0x00022a70 crc24: 007476f5 -/bin/ls: 0x00000000-0x00022a70 crc32: 09ad52f8 -/bin/ls: 0x00000000-0x00022a70 crc32ecma267: 4da033c1 -/bin/ls: 0x00000000-0x00022a70 crc32c: ad8aa54c -/bin/ls: 0x00000000-0x00022a70 crc32bzip2: 2db14275 -/bin/ls: 0x00000000-0x00022a70 crc32d: 1a82c6fe -/bin/ls: 0x00000000-0x00022a70 crc32mpeg2: d24ebd8a -/bin/ls: 0x00000000-0x00022a70 crc32posix: 58bb93aa -/bin/ls: 0x00000000-0x00022a70 crc32q: fa075365 -/bin/ls: 0x00000000-0x00022a70 crc32jamcrc: f652ad07 -/bin/ls: 0x00000000-0x00022a70 crc32xfer: bd66a285 -/bin/ls: 0x00000000-0x00022a70 crc64: 02bfeb9d3cc5ba89 -/bin/ls: 0x00000000-0x00022a70 crc64ecma182: 02bfeb9d3cc5ba89 -/bin/ls: 0x00000000-0x00022a70 crc64we: 45f4fd1aca1b6d00 -/bin/ls: 0x00000000-0x00022a70 crc64xz: 7ad92fbc2cb7bbaa -/bin/ls: 0x00000000-0x00022a70 crc64iso: 69770b2efe4f8aae -/bin/ls: 0x00000000-0x00022a70 xor8: 92 -/bin/ls: 0x00000000-0x00022a70 xor16: 594c -/bin/ls: 0x00000000-0x00022a70 xxhash32: 95374b80 -/bin/ls: 0x00000000-0x00022a70 parity: 01000000 -/bin/ls: 0x00000000-0x00022a70 entropy: 5.84008688 -/bin/ls: 0x00000000-0x00022a70 entropy_fract: 0.73001086 +$ rz-hash -a all /usr/bin/ls +/usr/bin/ls: 0x00000000-0x00024ac8 md2: 9a2a86a52e9cb44b2e06a58a00fee15e +/usr/bin/ls: 0x00000000-0x00024ac8 md4: 725fc3498847e96d031ce4d1f4872b28 +/usr/bin/ls: 0x00000000-0x00024ac8 md5: bcf16aef7487e6ea478a168c180c07fa +/usr/bin/ls: 0x00000000-0x00024ac8 sha1: ccb226a119fe301b2ad2dc8a7013faf6f6296ea1 +/usr/bin/ls: 0x00000000-0x00024ac8 sha256: 1ba437f3522d9e416f66425fdb816dfbaf32b4140d2092f2b8922b2825e4065a +/usr/bin/ls: 0x00000000-0x00024ac8 sha384: fd51260b6db6940f37a3177b033e1a7e9c1cedd3ee5c9c45e3dfdac135a65edf991313da7083551f091ef553e2f4fe29 +/usr/bin/ls: 0x00000000-0x00024ac8 sha512: de26d5385088825cce3fcb87645ddf2148e4c71013ff98170ccd106795d6cf4669e1b71053593472667ff64f17a0190ab3d99430d784a0489c7bc6344d5605db +/usr/bin/ls: 0x00000000-0x00024ac8 sm3: 9410230b34c9ab0c527c21cbdb9f953cc747479c89d019a4f6ae4c7cffd40bb0 +/usr/bin/ls: 0x00000000-0x00024ac8 blake3: 5805f287d8c0107ffdc7960b6112b586940d9767e888023048543daefa4cf6e5 +/usr/bin/ls: 0x00000000-0x00024ac8 fletcher8: dd +/usr/bin/ls: 0x00000000-0x00024ac8 fletcher16: ddf3 +/usr/bin/ls: 0x00000000-0x00024ac8 fletcher32: dd003c1a +/usr/bin/ls: 0x00000000-0x00024ac8 fletcher64: 5952f47d393a5468 +/usr/bin/ls: 0x00000000-0x00024ac8 adler32: 86e2c3a1 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8smbus: 15 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8cdma2000: d9 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8darc: 3b +/usr/bin/ls: 0x00000000-0x00024ac8 crc8dvbs2: 3d +/usr/bin/ls: 0x00000000-0x00024ac8 crc8ebu: 67 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8icode: 67 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8itu: 40 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8maxim: b3 +/usr/bin/ls: 0x00000000-0x00024ac8 crc8rohc: ad +/usr/bin/ls: 0x00000000-0x00024ac8 crc8wcdma: ab +/usr/bin/ls: 0x00000000-0x00024ac8 crc15can: 7126 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16: f310 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16citt: be97 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16usb: 9f67 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16hdlc: 282a +/usr/bin/ls: 0x00000000-0x00024ac8 crc16augccitt: 5c10 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16buypass: fc1c +/usr/bin/ls: 0x00000000-0x00024ac8 crc16cdma2000: d203 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16dds110: 98aa +/usr/bin/ls: 0x00000000-0x00024ac8 crc16dectr: 0485 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16dectx: 0484 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16dnp: d2fd +/usr/bin/ls: 0x00000000-0x00024ac8 crc16en13757: f12e +/usr/bin/ls: 0x00000000-0x00024ac8 crc16genibus: 4168 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16maxim: 0cef +/usr/bin/ls: 0x00000000-0x00024ac8 crc16mcrf4xx: d7d5 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16riello: ecaa +/usr/bin/ls: 0x00000000-0x00024ac8 crc16t10dif: faeb +/usr/bin/ls: 0x00000000-0x00024ac8 crc16teledisk: bee3 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16tms37157: 60a7 +/usr/bin/ls: 0x00000000-0x00024ac8 crca: e518 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16kermit: 0dcc +/usr/bin/ls: 0x00000000-0x00024ac8 crc16modbus: 6098 +/usr/bin/ls: 0x00000000-0x00024ac8 crc16x25: 282a +/usr/bin/ls: 0x00000000-0x00024ac8 crc16xmodem: 26cc +/usr/bin/ls: 0x00000000-0x00024ac8 crc24: 0078edb6 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32: ea7111a0 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32ecma267: d7f2cb8a +/usr/bin/ls: 0x00000000-0x00024ac8 crc32c: bb2af410 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32bzip2: b307ddba +/usr/bin/ls: 0x00000000-0x00024ac8 crc32d: 5de137a9 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32mpeg2: 4cf82245 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32posix: 35f6bff2 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32q: 54013d42 +/usr/bin/ls: 0x00000000-0x00024ac8 crc32jamcrc: 158eee5f +/usr/bin/ls: 0x00000000-0x00024ac8 crc32xfer: 7d03019b +/usr/bin/ls: 0x00000000-0x00024ac8 crc64: 60f233ef3032deac +/usr/bin/ls: 0x00000000-0x00024ac8 crc64ecma182: 60f233ef3032deac +/usr/bin/ls: 0x00000000-0x00024ac8 crc64we: c52bf0d6179e06ef +/usr/bin/ls: 0x00000000-0x00024ac8 crc64xz: eab2da06f0ff2682 +/usr/bin/ls: 0x00000000-0x00024ac8 crc64iso: 8734628b8dc355f3 +/usr/bin/ls: 0x00000000-0x00024ac8 xor8: fd +/usr/bin/ls: 0x00000000-0x00024ac8 xor16: 67ec +/usr/bin/ls: 0x00000000-0x00024ac8 xxhash32: 8a1da333 +/usr/bin/ls: 0x00000000-0x00024ac8 ssdeep: 3072:HANnWPSHOTx1XPglbUXVoaJzbuWXod7QHwOP1hKYfrMJM:HABAIi162VjJzCpJQHwWhHr0 +/usr/bin/ls: 0x00000000-0x00024ac8 parity: 01 +/usr/bin/ls: 0x00000000-0x00024ac8 entropy: 5.95354820 +/usr/bin/ls: 0x00000000-0x00024ac8 entropy_fract: 0.74419352 ``` diff --git a/src/tools/rz-pm/intro.md b/src/tools/rz-pm/intro.md index bb4617c3..f7727075 100644 --- a/src/tools/rz-pm/intro.md +++ b/src/tools/rz-pm/intro.md @@ -1,7 +1,10 @@ ## Rizin package manager Rizin has its own package manager for managing external plugins (mainly from -[rizin-extras](https://github.com/rizinorg/rizin-extras)). As most of the package managers it allows you to install, remove or update packages (plugins). There is one restriction though - currently it supports only the compilation from sources, so providing the toolchan, necessary build tools and dependencies for each plugin is the user responsibility. It might change in the future. +[rizin-extras](https://github.com/rizinorg/rizin-extras)). As most of the package managers it allows you to install, +remove or update packages (plugins). There is one restriction though - currently it supports only the compilation +from sources, so providing the toolchan, necessary build tools and dependencies for each plugin is the user +responsibility. It might change in the future. ``` $ rz-pm -h @@ -35,6 +38,7 @@ Already up to date. ``` There are many commands available now: + ``` rz-pm -h Usage: rz-pm [init|update|cmd] [...] diff --git a/src/tools/rz-run/intro.md b/src/tools/rz-run/intro.md index 5485026b..9cf04886 100644 --- a/src/tools/rz-run/intro.md +++ b/src/tools/rz-run/intro.md @@ -6,12 +6,18 @@ a binary for debugging. ``` $ rz-run -h -Usage: rz-run -v|-t|script.rrz [directive ..] +Usage: [directives] [script.rz] [--] [program] [args] + -h Show this help + -l Show profile options + -t Output template profile + -v Show version information + -w Wait for incoming terminal process + -- [program] [args] Run commands ``` It takes the text file in key=value format to specify the execution environment. Rz-run can be used as both separate tool or as a part of rizin. -To load the rz-run profile in rizin you need to use either `-r` to load +To load the rz-run profile in Rizin you need to use either `-r` to load the profile from file or `-R` to specify the directive from string. The format of the profile is very simple. Note the most important keys - `program` and `arg*` diff --git a/src/tools/rz-sign/intro.md b/src/tools/rz-sign/intro.md index 95adb730..ad66e4e7 100644 --- a/src/tools/rz-sign/intro.md +++ b/src/tools/rz-sign/intro.md @@ -1,6 +1,7 @@ ## Rasign2 -The `rz-sign` tool allows you to quickly create signature files. To create a SDB signature file named `libc_sigs2.sdb` for the `libc.so.6` binary, simply run: +The `rz-sign` tool allows you to quickly create signature files. To create a SDB signature file named +`libc_sigs2.sdb` for the `libc.so.6` binary, simply run: ``` $ rz-sign -o libc_sigs2.sdb libc.so.6 @@ -19,7 +20,8 @@ generated zignatures: 2870 [0x00024330]> zos libc_sigs.sdb # write zignatures to a file ``` -The '-a' flag can be added to increase the amount of analysis performed. This will result in more functions discovered and more zignatures created. +The '-a' flag can be added to increase the amount of analysis performed. This will result in more functions discovered +and more zignatures created. ``` $ rz-sign -a -o /tmp/libc_sigs2.sdb libc.so.6 @@ -36,7 +38,8 @@ generated zignatures: 2955 ### Output types -There are 3 different output methods. Using `-o` to create an SDB is shown above. The `-r` flag will print the discovered signatures to stdout as rizin commands. +There are 3 different output methods. Using `-o` to create an SDB is shown above. The `-r` flag will print +the discovered signatures to stdout as rizin commands. ``` $ rz-sign -r hello_world |grep main diff --git a/src/visual_mode/intro.md b/src/visual_mode/intro.md index e319f578..09946a2e 100644 --- a/src/visual_mode/intro.md +++ b/src/visual_mode/intro.md @@ -1,6 +1,7 @@ # Visual Mode -The visual mode is a more user-friendly interface alternative to rizin's command-line prompt. It allows easy navigation, has a cursor mode for selecting bytes, and offers numerous key bindings to simplify debugger use. +The visual mode is a more user-friendly interface alternative to Rizin's command-line prompt. It allows easy navigation, +has a cursor mode for selecting bytes, and offers numerous key bindings to simplify debugger use. To enter visual mode, use `V` command. To exit from it back to command line, press `q`. ## Navigation @@ -11,11 +12,12 @@ example `5j` will move down for 5 lines, or `2l` will move 2 characters right. ![Visual Mode](visualmode.png) -## print modes aka panels +## Print modes aka panels -The Visual mode uses "print modes" which are basically different panel that you can rotate. By default those are: +The Visual mode uses "print modes" which are basically different panel that you can rotate. By default, those are: -↻ **Hexdump panel** -> **Disassembly panel** → **Debugger panel** → **Hexadecimal words dump panel** → **Hex-less hexdump panel** → **Op analysis color map panel** → **Annotated hexdump panel** ↺. +↻ **Hexdump panel** -> **Disassembly panel** → **Debugger panel** → **Hexadecimal words dump panel** → +**Hex-less hexdump panel** → **Op analysis color map panel** → **Annotated hexdump panel** ↺. Notice that the top of the panel contains the command which is used, for example for the disassembly panel: diff --git a/src/visual_mode/visual_configuration_editor.md b/src/visual_mode/visual_configuration_editor.md index a4abec5d..459ce4c9 100644 --- a/src/visual_mode/visual_configuration_editor.md +++ b/src/visual_mode/visual_configuration_editor.md @@ -1,7 +1,8 @@ # Visual Configuration Editor -`Ve` or `e` in visual mode allows you to edit rizin configuration visually. -For example, if you want to change the assembly display just select `asm` in the list and choose your assembly display flavor. +`Ve` or `e` in visual mode allows you to edit Rizin configuration visually. +For example, if you want to change the assembly display just select `asm` in the list and choose your assembly +display flavor. ![First Select asm](select_asm.png) diff --git a/src/visual_mode/visual_disassembly.md b/src/visual_mode/visual_disassembly.md index f850329e..2424ddfc 100644 --- a/src/visual_mode/visual_disassembly.md +++ b/src/visual_mode/visual_disassembly.md @@ -2,8 +2,11 @@ ## Navigation -Move within the Disassembly using arrow keys or `hjkl`. Use `g` to seek directly to a flag or an offset, type it when requested by the prompt: `[offset]>`. -Follow a jump or a call using the `number` of your keyboard `[0-9]` and the number on the right in disassembly to follow a call or a jump. In this example typing `1` on the keyboard would follow the call to `sym.imp.__libc_start_main` and therefore, seek at the offset of this symbol. +Move within the Disassembly using arrow keys or `hjkl`. Use `g` to seek directly to a flag or an offset, type it +when requested by the prompt: `[offset]>`. +Follow a jump or a call using the `number` of your keyboard `[0-9]` and the number on the right in disassembly +to follow a call or a jump. In this example typing `1` on the keyboard would follow the call to +`sym.imp.__libc_start_main` and therefore, seek at the offset of this symbol. ``` 0x00404894 e857dcffff call sym.imp.__libc_start_main ;[1] @@ -13,7 +16,8 @@ Seek back to the previous location using `u`, `U` will allow you to redo the see ## `d` as define -`d` can be used to change the type of data of the current block, several basic types/structures are available as well as more advanced one using `pf` template: +`d` can be used to change the type of data of the current block, several basic types/structures are available +as well as more advanced one using `pf` template: ``` d → ... @@ -28,7 +32,10 @@ d → d ... ``` -To improve code readability you can change how rizin presents numerical values in disassembly, by default most of disassembly display numerical value as hexadecimal. Sometimes you would like to view it as a decimal, binary or even custom defined constant. To change value format you can use `d` following by `i` then choose what base to work in, this is the equivalent to `ahi`: +To improve code readability you can change how Rizin presents numerical values in disassembly, by default most +disassembly display numerical value as hexadecimal. Sometimes you would like to view it as a decimal, binary or even +custom defined constant. To change value format you can use `d` following by `i` then choose what base to work in, +this is the equivalent to `ahi`: ``` d → i → ... @@ -41,34 +48,44 @@ d → i → 2 ### Usage of the Cursor for Inserting/Patching... -Remember that, to be able to actually edit files loaded in rizin, you have to start it with the `-w` option. Otherwise a file is opened in read-only mode. +Remember that, to be able to actually edit files loaded in rizin, you have to start it with the `-w` option. +Otherwise, a file is opened in read-only mode. -Pressing lowercase `c` toggles the cursor mode. When this mode is active, the currently selected byte (or byte range) is highlighted. +Pressing lowercase `c` toggles the cursor mode. When this mode is active, the currently selected byte (or byte range) +is highlighted. ![Cursor at 0x00404896](cursor.png) -The cursor is used to select a range of bytes or simply to point to a byte. You can use the cursor to create a named flag at specific location. To do so, seek to the required position, then press `f` and enter a name for a flag. -If the file was opened in write mode using the `-w` flag or the `o+` command, you can also use the cursor to overwrite a selected range with new values. To do so, select a range of bytes (with HJKL and SHIFT key pressed), then press `i` and enter the hexpair values for the new data. The data will be repeated as needed to fill the range selected. For example: +The cursor is used to select a range of bytes or simply to point to a byte. You can use the cursor to create +a named flag at specific location. To do so, seek to the required position, then press `f` and enter a name for a flag. +If the file was opened in write mode using the `-w` flag or the `o+` command, you can also use the cursor to overwrite +a selected range with new values. To do so, select a range of bytes (with HJKL and SHIFT key pressed), then press `i` +and enter the hexpair values for the new data. The data will be repeated as needed to fill the range selected. +For example: + ```