Paging

Justas Masiulis 2018

So what is paging?

Paging is the process of translating linear addresses so that they can be used to access memory or I/O devices

I'll be talking mainly about 4 level paging

A top-down explanation


uint64_t virtual_to_physical(linear_addr a)
{
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a.dir_ptr_idx];
  auto pde  = phys_mem<pd_t>(pdpt.dir_addr)[a.dir_idx];
  auto pte  = phys_mem<pt_t>(pde.table_addr)[a.table_idx];
  return pte.page_frame + a.page_offset;
}
					

uint64_t virtual_to_physical(linear_addr a)
{
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a.dir_ptr_idx];
  auto pde  = phys_mem<pd_t>(pdpt.dir_addr)[a.dir_idx];
  auto pte  = phys_mem<pt_t>(pde.table_addr)[a.table_idx];
  return pte.page_frame + a.page_offset;
}
						

struct linear_addr {
  uint64_t page_offset       : 12;
  uint64_t table_idx         : 9;
  uint64_t dir_idx           : 9;
  uint64_t dir_ptr_idx       : 9;
  uint64_t pml4_idx          : 9;
  uint64_t reserved          : 16;
};
						
Canonical address

uint64_t virtual_to_physical(linear_addr a)
{
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a.dir_ptr_idx];
  auto pde  = phys_mem<pd_t>(pdpt.dir_addr)[a.dir_idx];
  auto pte  = phys_mem<pt_t>(pde.table_addr)[a.table_idx];
  return pte.page_frame + a.page_offset;
}
						

struct cr3_t {
  uint64_t ignored       : 3;
  uint64_t write_trough  : 1; // PWT
  uint64_t cache_disable : 1; // PCD
  uint64_t ignored_2     : 7;
  uint64_t pml4_addr     : 40;
  uint64_t reserved      : 12; // must be 0
};
						

mov rax, cr3
						

uint64_t virtual_to_physical(linear_addr a)
{
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a.dir_ptr_idx];
  auto pde  = phys_mem<pd_t>(pdpt.dir_addr)[a.dir_idx];
  auto pte  = phys_mem<pt_t>(pde.table_addr)[a.table_idx];
  return pte.page_frame + a.page_offset;
}
						

template<class T>
T* phys_mem(uint64_t phys_address)
{
  return (T*)MmGetVirtualForPhysical((void*)(phys_address));
}
						

uint64_t phys_mem_addr = 0;

template<class T>
T* phys_mem(uint64_t phys_address)
{
  if(!physical_memory_address) {
	auto range = MmGetPhysicalMemoryRanges();
	phys_mem_addr = (uint64_t)MmMapIoSpace(
			range.BaseAddress,
			range.NumberOfBytes.QuadPart,
			MmCached)
  }
  return (T*)(phys_mem_addr + phys_address);
}
						

struct pml4_t { // PML4E
  uint64_t present            : 1; // P
  uint64_t writeable          : 1; // R/W
  uint64_t user               : 1; // U/S
  uint64_t write_through      : 1; // PWT
  uint64_t cache_disable      : 1; // PCD
  uint64_t accessed           : 1; // A
  uint64_t ignored            : 1;
  uint64_t reserved           : 1; // PS must be 0
  uint64_t ignored_2          : 4;
  uint64_t dir_ptr_addr       : 40;
  uint64_t reserved_2         : 1; // must be 0
  uint64_t ignored_3          : 10;
  uint64_t non_executable     : 1; // XD
};							
						

uint64_t virtual_to_physical(linear_addr a)
{
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a.dir_ptr_idx];
  auto pde  = phys_mem<pd_t>(pdpt.dir_addr)[a.dir_idx];
  auto pte  = phys_mem<pt_t>(pde.table_addr)[a.table_idx];
  return pte.page_frame + a.page_offset;
}
						

struct pdpt_t { // PDPTE
  uint64_t present        : 1; // P
  uint64_t writeable      : 1; // R/W
  uint64_t user           : 1; // U/S
  uint64_t write_through  : 1; // PWT
  uint64_t cache_disable  : 1; // PCD
  uint64_t accessed       : 1; // A
  uint64_t ignored        : 1;
  uint64_t page_size      : 1; // PS
  uint64_t ignored_2      : 4;
  uint64_t dir_addr       : 40;
  uint64_t reserved_2     : 1; // must be 0
  uint64_t ignored_3      : 10;
  uint64_t non_executable : 1; // XD
};
						

struct pdt { // PDE
  uint64_t present        : 1; // P
  uint64_t writeable      : 1; // R/W
  uint64_t user           : 1; // U/S
  uint64_t write_through  : 1; // PWT
  uint64_t cache_disable  : 1; // PCD
  uint64_t accessed       : 1; // A
  uint64_t ignored        : 1;
  uint64_t page_size      : 1; // PS
  uint64_t ignored_2      : 4;
  uint64_t table_addr     : 40;
  uint64_t reserved_2     : 1; // must be 0
  uint64_t ignored_3      : 10;
  uint64_t non_executable : 1; // XD
};
						

uint64_t virtual_to_physical(linear_addr a)
{
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a.dir_ptr_idx];
  auto pde  = phys_mem<pd_t>(pdpt.dir_addr)[a.dir_idx];
  auto pte  = phys_mem<pt_t>(pde.table_addr)[a.table_idx];
  return pte.page_frame + a.page_offset;
}
						

struct pt_t { // PTE
  uint64_t present        : 1; // P
  uint64_t writeable      : 1; // R/W
  uint64_t user           : 1; // U/S
  uint64_t write_through  : 1; // PWT
  uint64_t cache_disable  : 1; // PCD
  uint64_t accessed       : 1; // A
  uint64_t dirty          : 1; // D
  uint64_t pat            : 1; // PAT
  uint64_t global         : 1; // G
  uint64_t ignored_2      : 3;
  uint64_t page_frame     : 40;
  uint64_t reserved_2     : 1; // must be 0
  uint64_t ignored_3      : 6;
  uint64_t key            : 4;
  uint64_t non_executable : 1; // XD
};
							

Are we done?

Nope


uint64_t virtual_to_physical(linear_addr a) {
  cr3_t cr3 = readcr3();
  auto pml4 = phys_mem<pml4_t>(cr3.pml4_addr)[a._4k.pml4_idx];
  auto pdpt = phys_mem<pdpt_t>(pml4.dir_ptr_addr)[a._4k.dir_ptr_idx];
  if(pdpt.ref.page_size)
	return pdpt.mapped.page_frame + a._1g.page_offset;
	
  auto pde  = phys_mem<pd_t>(pdpt.ref.dir_addr)[a._4k.dir_idx];
  if(pde.ref.page_size)
	return pde.mapped.page_frame + a._2m.page_offset;

  auto pte  = phys_mem<pt_t>(pde.ref.table_addr)[a.table_idx];
  return pte.page_frame + a._4k.page_offset;
}
						

union linear_addr {
  struct {
    uint64_t page_offset       : 12;
    uint64_t table_idx         : 9;
    uint64_t dir_idx           : 9;
    uint64_t dir_ptr_idx       : 9;
    uint64_t pml4_idx          : 9;
    uint64_t reserved          : 16;
  } _4k;
  struct {
    uint64_t page_offset       : 21;
    uint64_t dir_idx           : 9;
    uint64_t dir_ptr_idx       : 9;
    uint64_t pml4_idx          : 9;
    uint64_t reserved          : 16;
  } _4k;
  struct {
    uint64_t page_offset       : 30;
    uint64_t dir_ptr_idx       : 9;
    uint64_t pml4_idx          : 9;
    uint64_t reserved          : 16;
  } _1g;
};
						

struct pdpt_t { // PDPTE
  struct {
    uint64_t present        : 1; // P
    uint64_t writeable      : 1; // R/W
    uint64_t user           : 1; // U/S
    uint64_t write_through  : 1; // PWT
    uint64_t cache_disable  : 1; // PCD
    uint64_t accessed       : 1; // A
    uint64_t ignored        : 1;
    uint64_t page_size      : 1; // PS
    uint64_t ignored_2      : 4;
    uint64_t dir_addr       : 40;
    uint64_t reserved_2     : 1; // must be 0
    uint64_t ignored_3      : 10;
    uint64_t non_executable : 1; // XD
  } ref;

  struct {
    uint64_t present        : 1; // P
    uint64_t writeable      : 1; // R/W
    uint64_t user           : 1; // U/S
    uint64_t write_through  : 1; // PWT
    uint64_t cache_disable  : 1; // PCD
    uint64_t accessed       : 1; // A
    uint64_t dirty          : 1;
    uint64_t page_size      : 1; // PS
    uint64_t global         : 1;
    uint64_t ignored        : 3;
    uint64_t pat            : 1;
    uint64_t reserved       : 18;
    uint64_t page_frame     : 21;
    uint64_t reserved_2     : 1; // must be 0
    uint64_t ignored_3      : 6;
    uint64_t key            : 4;
    uint64_t non_executable : 1; // XD
  } mapped;
};
						

union pd_t { // PDE
  struct {
    uint64_t present        : 1; // P
    uint64_t writeable      : 1; // R/W
    uint64_t user           : 1; // U/S
    uint64_t write_through  : 1; // PWT
    uint64_t cache_disable  : 1; // PCD
    uint64_t accessed       : 1; // A
    uint64_t ignored        : 1;
    uint64_t page_size      : 1; // PS
    uint64_t ignored_2      : 4;
    uint64_t table_addr     : 40;
    uint64_t reserved_2     : 1; // must be 0
    uint64_t ignored_3      : 10;
    uint64_t non_executable : 1; // XD
  } ref;

  struct {
    uint64_t present        : 1; // P
    uint64_t writeable      : 1; // R/W
    uint64_t user           : 1; // U/S
    uint64_t write_through  : 1; // PWT
    uint64_t cache_disable  : 1; // PCD
    uint64_t accessed       : 1; // A
    uint64_t dirty          : 1;
    uint64_t page_size      : 1; // PS
    uint64_t global         : 1;
    uint64_t ignored        : 3;
    uint64_t pat            : 1;
    uint64_t reserved       : 8;
    uint64_t page_frame     : 31;
    uint64_t reserved_2     : 1; // must be 0
    uint64_t ignored_2      : 6;
    uint64_t key            : 4;
    uint64_t non_executable : 1; // XD
  } mapped;
};
						

Quick overview of other paging modes

32 bit with PAE

32 bit virtual address

3 levels with 8 byte entries

top level has 4 entries

32 bit

32 bit virtual address

2 levels with 4 byte entries

That's it

References:

Intel software developer manual