Freeing memory with C that was initialized by Rust

Background

This past weeks I’ve been working on a Rust PAM client to authenticate a password. Although there are some crates out there that do this for you, I thought I could do one myself and interact with PAM directly through FFI. This means creating the Rust bindings with bindgen using pam_appl.h (found in /usr/include/security/pam_appl.h in Debian), and then reading PAM’s documentation to understand how to do the auth dance.

One of those dance steps involves understanding how PAM expects pam_conv. This is a function called by PAM modules when your client calls (for example) pam_authenticate. So you create this conv function and send it to pam_start, it returns a blind struct pam_handle_t which the caller needs to pass over through all the PAM functions it calls (this struct contains the function). Later when the client calls pam_authenticate, PAM will retrieve the conversation function from pam_handle_t and call it with an array of pam_message. It’ll also initialize a pointer to an array of pam_response and send it over for the client to fill in and reply back. PAM will later free the memory in this response array.

So an FFI between a Rust conversation function and C PAM module means there’s a malloc and free dance that might end in disaster. Rust has a robust way of allocating/deallocating memory while C has a more simpler way.

C and memory management

This is a (very quick) summary of how C manages memory on any given program.

Consider the following program:

#include <stdio.h>

int main() {
    int a = 1;

    printf("The value of a=%d\n", a);
    return 0;
}

This program creates a variable a with the value 1 and prints it using printf. Because the value of this variable is known at compile time there’s no need for runtime allocations, the compiler places it in the stack:

$ gdb stack_test
...
(gdb) b main
Breakpoint 1 at 0x1155: file stack_test.c, line 5.
(gdb) r
Starting program: /home/nicolas/stack_test 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

Breakpoint 1, main () at stack_test.c:5
5           int a = 1;
(gdb) disassemble /s main
Dump of assembler code for function main:
stack_test.c:
4       int main() {
   0x0000555555555149 <+0>:     endbr64
   0x000055555555514d <+4>:     push   %rbp
   0x000055555555514e <+5>:     mov    %rsp,%rbp
   0x0000555555555151 <+8>:     sub    $0x10,%rsp

5           int a = 1;
=> 0x0000555555555155 <+12>:    movl   $0x1,-0x4(%rbp)

6
7           printf("The value of a=%d\n", a);
   0x000055555555515c <+19>:    mov    -0x4(%rbp),%eax
   0x000055555555515f <+22>:    mov    %eax,%esi
   0x0000555555555161 <+24>:    lea    0xe9c(%rip),%rax        # 0x555555556004
   0x0000555555555168 <+31>:    mov    %rax,%rdi
   0x000055555555516b <+34>:    mov    $0x0,%eax
   0x0000555555555170 <+39>:    call   0x555555555050 <printf@plt>

8           return 0;
   0x0000555555555175 <+44>:    mov    $0x0,%eax

9       }
   0x000055555555517a <+49>:    leave
   0x000055555555517b <+50>:    ret
End of assembler dump.

The instruction movl $0x1,-0x4(%rbp) means “move the value 1 into the contents of %rbp (the stack base pointer)”. Because we’re dealing with int in my machine that’s a 32-bit value (4 bytes), so it shifts 4 bytes to store the contents (and uses movl instead of mov).

(gdb) s
6           printf("The value of a=%d\n", a);
(gdb) info registers rsp rbp
rsp            0x7fffffffe280      0x7fffffffe280
rbp            0x7fffffffe290      0x7fffffffe290
(gdb) x/2gx $rsp
0x7fffffffe280: 0x00007fffffffe370      0x00000001ffffe3b8

And there you go, 0x00000001ffffe3b8 has 0x00000001 in its upper half, the value of a.

Let’s see how C manages the heap, consider the following program:

#include <stdio.h>
#include <stdlib.h>

int main() {
    int *array;
    int len = 5;

    array = (int *) malloc(len * sizeof(int));

    for (int i = 0; i < len; i++) {
        array[i] = i;
        printf("array[%d] = %d\n", i, array[i]);
    }

    free(array);
    return 0;
}

The disassembly of the binary is as follows:

$ gdb free_test 
...
(gdb) b main
Breakpoint 1 at 0x1195: file free_test.c, line 6.
(gdb) r
Starting program: /home/nicolas/free_test 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

Breakpoint 1, main () at free_test.c:6
6           int len = 5;
(gdb) disassemble /s main
Dump of assembler code for function main:
free_test.c:
4       int main() {
   0x0000555555555189 <+0>:     endbr64
   0x000055555555518d <+4>:     push   %rbp
   0x000055555555518e <+5>:     mov    %rsp,%rbp
   0x0000555555555191 <+8>:     sub    $0x10,%rsp

5           int *array;
6           int len = 5;
=> 0x0000555555555195 <+12>:    movl   $0x5,-0xc(%rbp)

7
8           array = (int *) malloc(len * sizeof(int));
   0x000055555555519c <+19>:    mov    -0xc(%rbp),%eax
   0x000055555555519f <+22>:    cltq
   0x00005555555551a1 <+24>:    shl    $0x2,%rax
   0x00005555555551a5 <+28>:    mov    %rax,%rdi
   0x00005555555551a8 <+31>:    call   0x555555555090 <malloc@plt>
   0x00005555555551ad <+36>:    mov    %rax,-0x8(%rbp)

9
10          for (int i = 0; i < len; i++) {
   0x00005555555551b1 <+40>:    movl   $0x0,-0x10(%rbp)
   0x00005555555551b8 <+47>:    jmp    0x555555555206 <main+125>

11              array[i] = i;
   0x00005555555551ba <+49>:    mov    -0x10(%rbp),%eax
   0x00005555555551bd <+52>:    cltq
   0x00005555555551bf <+54>:    lea    0x0(,%rax,4),%rdx
   0x00005555555551c7 <+62>:    mov    -0x8(%rbp),%rax
   0x00005555555551cb <+66>:    add    %rax,%rdx
   0x00005555555551ce <+69>:    mov    -0x10(%rbp),%eax
   0x00005555555551d1 <+72>:    mov    %eax,(%rdx)

12              printf("array[%d] = %d\n", i, array[i]);
   0x00005555555551d3 <+74>:    mov    -0x10(%rbp),%eax
   0x00005555555551d6 <+77>:    cltq
   0x00005555555551d8 <+79>:    lea    0x0(,%rax,4),%rdx
   0x00005555555551e0 <+87>:    mov    -0x8(%rbp),%rax
   0x00005555555551e4 <+91>:    add    %rdx,%rax
   0x00005555555551e7 <+94>:    mov    (%rax),%edx
   0x00005555555551e9 <+96>:    mov    -0x10(%rbp),%eax
   0x00005555555551ec <+99>:    mov    %eax,%esi
   0x00005555555551ee <+101>:   lea    0xe0f(%rip),%rax        # 0x555555556004
   0x00005555555551f5 <+108>:   mov    %rax,%rdi
   0x00005555555551f8 <+111>:   mov    $0x0,%eax
   0x00005555555551fd <+116>:   call   0x555555555080 <printf@plt>

10          for (int i = 0; i < len; i++) {
   0x0000555555555202 <+121>:   addl   $0x1,-0x10(%rbp)
   0x0000555555555206 <+125>:   mov    -0x10(%rbp),%eax
   0x0000555555555209 <+128>:   cmp    -0xc(%rbp),%eax
   0x000055555555520c <+131>:   jl     0x5555555551ba <main+49>

13          }
14
15          free(array);
   0x000055555555520e <+133>:   mov    -0x8(%rbp),%rax
   0x0000555555555212 <+137>:   mov    %rax,%rdi
   0x0000555555555215 <+140>:   call   0x555555555070 <free@plt>

16          return 0;
   0x000055555555521a <+145>:   mov    $0x0,%eax

17      }
   0x000055555555521f <+150>:   leave
   0x0000555555555220 <+151>:   ret
End of assembler dump.

Similarly as before it used the stack to save the contents of len and array with movl and mov respectively. Let’s see how that looks like in the stack:

(gdb) stepi
8           array = (int *) malloc(len * sizeof(int));
(gdb) info registers rsp rbp
rsp            0x7fffffffe280      0x7fffffffe280
rbp            0x7fffffffe290      0x7fffffffe290
(gdb) x/2gx $rsp
0x7fffffffe280: 0x00000005ffffe370      0x00007fffffffe3b8
(gdb) 

It executed movl $0x5,-0xc(%rbp): “Move the 32 bit data 0x5 into %rbp shifted 0xc bytes”. We can see the value at the upper half of the 64 bit data: 0x00000005ffffe370. Different than before where it only shifted 0x4, because it now made space for the 64 bit address that array is going to have. The following instructions will assign this memory using malloc:

0x000055555555519c <+19>:    mov    -0xc(%rbp),%eax
0x000055555555519f <+22>:    cltq
0x00005555555551a1 <+24>:    shl    $0x2,%rax
0x00005555555551a5 <+28>:    mov    %rax,%rdi
0x00005555555551a8 <+31>:    call   0x555555555090 <malloc@plt>
0x00005555555551ad <+36>:    mov    %rax,-0x8(%rbp)

The instructions before the call to malloc are meaningful for malloc only, it’s preparing all the input the function needs. We should only focus on the return that malloc gave us in %rax, this is the value of array and will be placed to the stack accordingly: mov %rax,-0x8(%rbp). Let’s run this code and inspect the stack:

(gdb) advance 10
main () at free_test.c:10
10          for (int i = 0; i < len; i++) {
(gdb) x/2gx $rsp
0x7fffffffe280: 0x00000005ffffe370      0x00005555555592a0
(gdb) x/1gx 0x00005555555592a0
0x5555555592a0: 0x0000000000000000
(gdb) 

advance 10 just means to move to line 10 in the code, handy gdb function when debugging symbols ar available. This effectively triggers the malloc code. Now the stack contains a new value: 0x00005555555592a0, this is the result of mov %rax,-0x8(%rbp) (store the result of malloc to %rbp shifted 8 bytes). The value 0x00005555555592a0 is the address of array, and the OS promised that at that address + 5 bytes belong to this program. We can explore the contents of the memory (now the heap):

(gdb) x/3gx 0x00005555555592a0
0x5555555592a0: 0x0000000000000000      0x0000000000000000
0x5555555592b0: 0x0000000000000000

Because int default to 32 bit in my machine, malloc allocated 5 32 bit spaces, represented here by 64 bit chunks. Let’s keep running the code to see how they fill up.

(gdb) advance 15
array[0] = 0
array[1] = 1
array[2] = 2
array[3] = 3
array[4] = 4
main () at free_test.c:15
15          free(array);
(gdb) info registers rsp rbp
rsp            0x7fffffffe280      0x7fffffffe280
rbp            0x7fffffffe290      0x7fffffffe290
(gdb) x/2gx $rsp
0x7fffffffe280: 0x0000000500000005      0x00005555555592a0
(gdb) x/3gx 0x00005555555592a0
0x5555555592a0: 0x0000000100000000      0x0000000300000002
0x5555555592b0: 0x0000000000000004
(gdb) 

Don’t worry about the code in the for loop, although we can dive into it and explain bit by bit, it’s not the focus of this post. The important thing is that the resulting numbers were placed in the heap at 0x00005555555592a0 (the address that malloc gave us) and 5 32 bit spaces after.

Then we call free() to tell the OS that we don’t want that memory anymore.

(gdb) advance 16
main () at free_test.c:16
16          return 0;
(gdb) info registers rsp rbp
rsp            0x7fffffffe280      0x7fffffffe280
rbp            0x7fffffffe290      0x7fffffffe290
(gdb) x/2gx $rsp
0x7fffffffe280: 0x0000000500000005      0x00005555555592a0
(gdb) x/3gx 0x00005555555592a0
0x5555555592a0: 0x0000000555555559      0x86c25f8360a4c502
0x5555555592b0: 0x0000000000000004
(gdb) 

free() will remove the assignment it made for the memory we requested, but will not alter the data that exists there. Although we can see a 0x00000004 in there from our code, it’s not guaranteed to exist after free() (for example, the other numbers were replaced by other things).

In C you need to make sure to alloc and dealloc memory as you use it, this is why many modern langauges like Go or Rust have memory management included for you by default (meaning memory assigned to variables in the heap will be freed automatically when they are no longer used).

Rust and memory management

The C examples above did not use a garbace collector. Modern langagues like Java or Go have a runtime program called “Gabage collector” (abbreviated GC), this takes care of memory management for us so we don’t have to worry about pairing malloc to free in our code.

Rust doesn’t have a GC, it automatically returns the memory after leaving out of scope. It does this by calling the method drop in the Drop trait for the object.

Let’s see this with an example, let’s start with a simple example:

fn main() {
    let a = 1;

    println!("The value of a={}", a);
}

Because rustc (Rust’s compiler) knows the size and value of a at compile time, it places the variable in the stack:

$ gdb rust_stack_memory 
...
(gdb) start
Temporary breakpoint 1 at 0x15354: file rust_stack_memory.rs, line 2.
Starting program: /home/nicolas/rust_stack_memory 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

Temporary breakpoint 1, rust_stack_memory::main () at rust_stack_memory.rs:2
2           let a = 1;
(gdb) disassemble /s
Dump of assembler code for function _ZN17rust_stack_memory4main17hd3a3de1a38df406aE:
rust_stack_memory.rs:
1       fn main() {
   0x0000555555569350 <+0>:     sub    $0x58,%rsp

2           let a = 1;
=> 0x0000555555569354 <+4>:     movl   $0x1,0x4(%rsp)

3
4           println!("The value of a={}", a);
   0x000055555556935c <+12>:    lea    0x48(%rsp),%rdi
   0x0000555555569361 <+17>:    lea    0x4(%rsp),%rsi
   0x0000555555569366 <+22>:    call   0x555555569480 <_ZN4core3fmt2rt8Argument11new_display17h7e24be30af2fb36bE>
   0x000055555556936b <+27>:    mov    0x48(%rsp),%rax
   0x0000555555569370 <+32>:    mov    %rax,0x38(%rsp)
   0x0000555555569375 <+37>:    mov    0x50(%rsp),%rax
   0x000055555556937a <+42>:    mov    %rax,0x40(%rsp)
   0x000055555556937f <+47>:    lea    0x8(%rsp),%rdi
   0x0000555555569384 <+52>:    lea    0x413cd(%rip),%rsi        # 0x5555555aa758
   0x000055555556938b <+59>:    lea    0x38(%rsp),%rdx
   0x0000555555569390 <+64>:    call   0x555555569440 <_ZN4core3fmt2rt38_$LT$impl$u20$core..fmt..Arguments$GT$6new_v117hcff756169ed10857E>
   0x0000555555569395 <+69>:    lea    0x8(%rsp),%rdi
   0x000055555556939a <+74>:    call   *0x43f20(%rip)        # 0x5555555ad2c0

5       }
   0x00005555555693a0 <+80>:    add    $0x58,%rsp
   0x00005555555693a4 <+84>:    ret
End of assembler dump.
(gdb) 

Inspecting the stack:

(gdb) stepi
4           println!("The value of a={}", a);
(gdb) info registers rsp rbp
rsp            0x7fffffffe020      0x7fffffffe020
rbp            0x7fffffffe210      0x7fffffffe210
(gdb) x/1gx $rsp
0x7fffffffe020: 0x0000000100000000
(gdb) 

We find the value was pushed to the stack in %rsp, so far this is the same behaviour as C. Let’s see an example with the heap:

fn main() {
    let a = String::from("Hello World!");
    println!("a={}", a);
}

The String::from method creates a String object that can grow, so this must be done in the heap. We can see what it does by disassembling the binary:

$ gdb rust_heap 
...
(gdb) start
Temporary breakpoint 1 at 0x16a77: file rust_heap.rs, line 2.
Starting program: /home/nicolas/rust_heap 

[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

Temporary breakpoint 1, rust_heap::main () at rust_heap.rs:2
2           let a = String::from("Hello World!");
(gdb) disassemble /s
Dump of assembler code for function _ZN9rust_heap4main17h9468d83c908b7b2bE:
rust_heap.rs:
1       fn main() {
   0x000055555556aa70 <+0>:     sub    $0x88,%rsp

2           let a = String::from("Hello World!");
=> 0x000055555556aa77 <+7>:     lea    -0xca43(%rip),%rsi        # 0x55555555e03b
   0x000055555556aa7e <+14>:    lea    0x8(%rsp),%rdi
   0x000055555556aa83 <+19>:    mov    %rdi,(%rsp)
   0x000055555556aa87 <+23>:    mov    $0xc,%edx
   0x000055555556aa8c <+28>:    call   0x55555556a8b0 <_ZN76_$LT$alloc..string..String$u20$as$u20$core..convert..From$LT$$RF$str$GT$$GT$4from17hd60d7e630ec9d734E>
   0x000055555556aa91 <+33>:    mov    (%rsp),%rsi
   0x000055555556aa95 <+37>:    lea    0x68(%rsp),%rdi

3           println!("a={}", a);
   0x000055555556aa9a <+42>:    call   0x555555569c30 <_ZN4core3fmt2rt8Argument11new_display17he8b03bd6e4c77b4cE>
   0x000055555556aa9f <+47>:    jmp    0x55555556aac0 <_ZN9rust_heap4main17h9468d83c908b7b2bE+80>
   0x000055555556aaa1 <+49>:    lea    0x8(%rsp),%rdi

4       }
   0x000055555556aaa6 <+54>:    call   0x55555556a1a0 <_ZN4core3ptr42drop_in_place$LT$alloc..string..String$GT$17h2767dddabd238b94E>
   0x000055555556aaab <+59>:    jmp    0x55555556ab0a <_ZN9rust_heap4main17h9468d83c908b7b2bE+154>
   0x000055555556aaad <+61>:    mov    %rax,%rcx
   0x000055555556aab0 <+64>:    mov    %edx,%eax
   0x000055555556aab2 <+66>:    mov    %rcx,0x78(%rsp)
   0x000055555556aab7 <+71>:    mov    %eax,0x80(%rsp)
   0x000055555556aabe <+78>:    jmp    0x55555556aaa1 <_ZN9rust_heap4main17h9468d83c908b7b2bE+49>

3           println!("a={}", a);
   0x000055555556aac0 <+80>:    movups 0x68(%rsp),%xmm0
   0x000055555556aac5 <+85>:    movaps %xmm0,0x50(%rsp)
   0x000055555556aaca <+90>:    lea    0x41357(%rip),%rsi        # 0x5555555abe28
   0x000055555556aad1 <+97>:    lea    0x20(%rsp),%rdi
   0x000055555556aad6 <+102>:   lea    0x50(%rsp),%rdx
   0x000055555556aadb <+107>:   call   0x555555569bf0 <_ZN4core3fmt2rt38_$LT$impl$u20$core..fmt..Arguments$GT$6new_v117hb30cbabe5203282dE>
   0x000055555556aae0 <+112>:   jmp    0x55555556aae2 <_ZN9rust_heap4main17h9468d83c908b7b2bE+114>
   0x000055555556aae2 <+114>:   mov    0x43edf(%rip),%rax        # 0x5555555ae9c8
   0x000055555556aae9 <+121>:   lea    0x20(%rsp),%rdi
   0x000055555556aaee <+126>:   call   *%rax
   0x000055555556aaf0 <+128>:   jmp    0x55555556aaf2 <_ZN9rust_heap4main17h9468d83c908b7b2bE+130>

4       }
   0x000055555556aaf2 <+130>:   lea    0x8(%rsp),%rdi
   0x000055555556aaf7 <+135>:   call   0x55555556a1a0 <_ZN4core3ptr42drop_in_place$LT$alloc..string..String$GT$17h2767dddabd238b94E>
   0x000055555556aafc <+140>:   add    $0x88,%rsp
   0x000055555556ab03 <+147>:   ret

1       fn main() {
   0x000055555556ab04 <+148>:   call   *0x43e7e(%rip)        # 0x5555555ae988
   0x000055555556ab0a <+154>:   mov    0x78(%rsp),%rdi
   0x000055555556ab0f <+159>:   call   0x5555555aad00 <_Unwind_Resume@plt>
End of assembler dump.
(gdb) 

Well this is way more than what was happening in the C version of this binary. Because String is an object it’s calling its constructor: call 0x55555556a8b0 <_ZN76_$LT$alloc..string..String$u20$as$u20$core..convert..From$LT$$RF$str$GT$$GT$4from17hd60d7e630ec9d734E>, this is a mangled function name but essentially allocates the String in the heap and returns information about the object. The function itself is not important (although there’s a lot happening inside), what’s important is how Rust prepares the arguments for the function and how it retrieves them back:

0x000055555556aa77 <+7>:     lea    -0xca43(%rip),%rsi        # 0x55555555e03b
0x000055555556aa7e <+14>:    lea    0x8(%rsp),%rdi
0x000055555556aa83 <+19>:    mov    %rdi,(%rsp)
0x000055555556aa87 <+23>:    mov    $0xc,%edx
0x000055555556aa8c <+28>:    call   0x55555556a8b0 <_ZN76_$LT$alloc..string..String$u20$as$u20$core..convert..From$LT$$RF$str$GT$$GT$4from17hd60d7e630ec9d734E>
0x000055555556aa91 <+33>:    mov    (%rsp),%rsi
0x000055555556aa95 <+37>:    lea    0x68(%rsp),%rdi

TODO: Continue.