golang memory management

memory management

memory allocator

Linear Allocation (Bump Allocator)

The pointer points to a free memory address, and the update pointer is modified to allocate the next memory address.

  1. Low complexity and high execution efficiency
  2. Memory cannot be reused after being reclaimed
    1. Need garbage collection mechanism, mark/copy/generational collection algorithm to sort out memory fragmentation
    2. c and c++ expose pointers to the outside world and cannot be used.

Free-List Allocator

Use the linked list structure to manage free memory blocks, and allocate memory addresses by modifying and updating the linked list connection.

  1. Easy to recycle memory
  2. Allocating memory retrieval is O(n)
    1. First-Fit, the linked list head traversal selects the first memory block larger than the requested memory
    2. Loop first adaptation (Next-Fit), from the previous traversal end address, select the first memory block larger than the requested memory
    3. Best-Fit, traversing the head of the linked list, selecting the most suitable memory block
    4. Segregated-Fit (Segregated-Fit), the memory is divided into multiple connected tables, and the memory block size of each connected table is the same. When applying for memory, first find a suitable connected table, and then find a suitable memory block.

runtime/mfixalloc.go

// Free List Allocator Optimal Fit
type fixalloc struct {
	size   uintptr // object size
	first  func(arg, p unsafe.Pointer) // called first time p is returned
	arg    unsafe.Pointer
	list   *mlink
	chunk  uintptr // use uintptr instead of unsafe.Pointer to avoid write barriers
	nchunk uint32
	inuse  uintptr // in-use bytes now
	stat   *sysMemStat
	zero   bool // zero allocations
}

Thread cache allocation (Thread-Caching Malloc, TCMalloc)

The go language memory allocator, borrowing from TCMalloc, adopts different allocation strategies for multi-level buffering according to the size of the object

object size

categorysize
micro objects(0, 16B)
small object[16B, 32KB]
large object(32KB, +∞)

runtime/sizeclasses.go

// classid Each span structure has a classid
// bytes/obj class object number of bytes
// bytes/span The number of bytes occupied by each span (pagenum*pagesize)
// object The number of objects that can be allocated per span (bytes/spans)/(bytes/obj)
// waste bytes Memory fragments generated by each span (bytes/spans) % (bytes/obj)
// class  bytes/obj  bytes/span  objects  tail waste  max waste
//     1          8        8192     1024           0     87.50%
//     2         16        8192      512           0     43.75%
//     3         24        8192      341           8     29.24%
//     4         32        8192      256           0     21.88%
//     5         48        8192      170          32     31.52%
//     6         64        8192      128           0     23.44%
//     7         80        8192      102          32     19.07%
//     8         96        8192       85          32     15.95%
//     9        112        8192       73          16     13.56%
//    10        128        8192       64           0     11.72%
//    11        144        8192       56         128     11.82%
//    12        160        8192       51          32      9.73%
//    13        176        8192       46          96      9.59%
//    14        192        8192       42         128      9.25%
//    15        208        8192       39          80      8.12%
//    16        224        8192       36         128      8.15%
//    17        240        8192       34          32      6.62%
//    18        256        8192       32           0      5.86%
//    19        288        8192       28         128     12.16%
//    20        320        8192       25         192     11.80%
//    21        352        8192       23          96      9.88%
//    22        384        8192       21         128      9.51%
//    23        416        8192       19         288     10.71%
//    24        448        8192       18         128      8.37%
//    25        480        8192       17          32      6.82%
//    26        512        8192       16           0      6.05%
//    27        576        8192       14         128     12.33%
//    28        640        8192       12         512     15.48%
//    29        704        8192       11         448     13.93%
//    30        768        8192       10         512     13.94%
//    31        896        8192        9         128     15.52%
//    32       1024        8192        8           0     12.40%
//    33       1152        8192        7         128     12.41%
//    34       1280        8192        6         512     15.55%
//    35       1408       16384       11         896     14.00%
//    36       1536        8192        5         512     14.00%
//    37       1792       16384        9         256     15.57%
//    38       2048        8192        4           0     12.45%
//    39       2304       16384        7         256     12.46%
//    40       2688        8192        3         128     15.59%
//    41       3072       24576        8           0     12.47%
//    42       3200       16384        5         384      6.22%
//    43       3456       24576        7         384      8.83%
//    44       4096        8192        2           0     15.60%
//    45       4864       24576        5         256     16.65%
//    46       5376       16384        3         256     10.92%
//    47       6144       24576        4           0     12.48%
//    48       6528       32768        5         128      6.23%
//    49       6784       40960        6         256      4.36%
//    50       6912       49152        7         768      3.37%
//    51       8192        8192        1           0     15.61%
//    52       9472       57344        6         512     14.28%
//    53       9728       49152        5         512      3.64%
//    54      10240       40960        4           0      4.99%
//    55      10880       32768        3         128      6.24%
//    56      12288       24576        2           0     11.45%
//    57      13568       40960        3         256      9.99%
//    58      14336       57344        4           0      5.35%
//    59      16384       16384        1           0     12.49%
//    60      18432       73728        4           0     11.11%
//    61      19072       57344        3         128      3.57%
//    62      20480       40960        2           0      6.87%
//    63      21760       65536        3         256      6.25%
//    64      24576       24576        1           0     11.45%
//    65      27264       81920        3         128     10.00%
//    66      28672       57344        2           0      4.91%
//    67      32768       32768        1           0     12.50%

multilevel cache

  1. Thread Cache
    1. No competition and no locks within the thread
    2. Limited memory space, responsible for the creation of small objects, and direct selection of page heap allocation for large objects
  2. Central Cache
  3. Page Heap

Address space state transition

state

stateexplain
NoneMemory is not reserved or mapped and is the default state of the address space
ReservedThe runtime holds that address space, but accessing that memory results in an error
PreparedThe memory is reserved. Generally, there is no corresponding physical memory. The behavior of accessing this piece of memory is undefined. It can quickly transition to the Ready state.
Readycan be accessed securely

state transition function

nameexplain
runtime.sysAllocA large chunk of available memory is obtained from the operating system, which may be hundreds of KB or several MB;
runtime.sysFreeIt will be called when the program is out of memory (Out-of Memory, OOM) and unconditionally return to memory;
runtime.sysReserveA memory area in the operating system is reserved, and accessing this memory will trigger an exception;
runtime.sysUsedNotify the operating system that the application needs to use the memory area to ensure that the memory area can be accessed safely;
runtime.sysUnusedNotify the operating system that the physical memory corresponding to the virtual memory is no longer needed, and the physical memory can be reused;
runtime.sysFaultConvert the memory area to a reserved state, mainly used for runtime debugging;
runtime.sysMapEnsure that the memory area can be quickly transitioned to the ready state;

[External link image transfer failed, the origin site may have anti-leech mechanism, it is recommended to save the image and upload it directly (img-CbqB2h7c-1652891154059)(https://img.draveness.me/2020-02-29-15829868066474-memory -regions-states-and-transitions.png)]

memory alignment

The memory address is an integer multiple of the stored size (subsection) so that cpu Data can be read out of memory at a time.

advantage

  1. To improve portability, the memory is aligned at compile time. Eliminate the differences of the CPU and achieve code portability.
  2. Improve memory access efficiency. The 32-bit CPU reads 4 bytes at a time, and the 64-bit CPU reads 8 bytes at a time. This length is called the CPU word length. The memory is changed into an integer multiple of the CPU word length, which reduces CPU fragmentation and improves access efficiency. .

shortcoming

  1. Wasting memory space in exchange for time

structure

  1. The offset of a member in a structure variable must be an integer multiple of the member size
  2. The address of the entire structure must be an integer multiple of the largest byte (1/4/8/16...)

memory escape

Each function has its own memory area to store input parameters, local variables,The return value, etc., are stored in the stack frame in the running stack, and are automatically destroyed after the function runs.
After the function ends, if these variables are still needed, the data in the stack will be allocated to the heap. This phenomenon is called memory escape.

escape mechanism

  1. There is no reference outside the function, and it is placed on the stack first
  2. If there is a reference outside the function, it must be placed on the heap
  3. If the stack cannot fit, it must be placed on the heap

escape analysis

go build -gcflags=-m main.go
  1. pointer escape
package main

func escape1() *int{
  var a int = 1
  return &a
}

func main(){
  escape1()
}

// ./main.go 4:6 moved to heap a
  1. Insufficient stack space
package main

func escape2() {
  s :=make([]int,0,100000)
  for index,_:=range s{
    s[index]=index
  }
}

func main(){
  escape2()
}

// ./main.go 4:1 make([]int,1000,1000) escapes to heap
  1. Variable size is not default
package main

func escape3() *int{
  number:=10
  s :=make([]int,number)
  for index:=0;index<len(s);index++{
    s[index]=index
  }
}

func main(){
  escape3()
}

//./main.go:18:6: can inline escape3
//./main.go:15:9: inlining call to escape3
//./main.go:15:9: make([]int, number) escapes to heap
//./main.go:20:10: make([]int, number) escapes to heap
  1. dynamic type
package main

func escape4() {
  fmt.println(1111)
}

func main(){
  escape4()
}

//./main.go:33:6: can inline escape4
./main.go:34:13: inlining call to fmt.Println
//./main.go:6:6: can inline main
//./main.go:10:9: inlining call to escape4
//./main.go:10:9: inlining call to fmt.Println
//./main.go:10:9: 1111 escapes to heap
//./main.go:10:9: []interface {}{...} does not escape
//./main.go:34:14: 1111 escapes to heap
//./main.go:34:13: []interface {}{...} does not escape
//<autogenerated>:1: .this does not escape
  1. closure reference
package main

func escape5() func()int {
  var i int =1
  return func() int {
    i++
    return
  }
}

func main(){
  escape5()
}
//./main.go:36:9: can inline escape5.func1
//./main.go:4:6: can inline main
//./main.go:35:6: moved to heap: i
//./main.go:36:9: func literal escapes to heap

Summarize

  1. Stack allocation is more efficient than heap memory allocation and does not require gc
  2. The purpose of escape analysis is to determine whether the memory address is a stack or a heap, to analyze the performance bottleneck of gc, which can be known at compile time
  3. As long as it is a pointer variable, it will be allocated on the heap, and it is more efficient to use value transfer for small variables

go memory management

virtual memory layout

go1.10 version and previous use contiguous memory

  • spans

    • Stores memory management unit mspan pointers, each pointer for 1orN page s
    • 512MB = 512GB/8KB( page size)*8B( pointer size)
  • bitmap

    • Each byte will indicate whether the 32 bytes in the arena area are free

    • Mainly used for gc

    • 16G = 512GB/32B*1B

  • arena

    • real heap
    • 512GB,page=8KB

The g1.11 version and later introduce two-dimensional sparse memory

  • [l1][l2]heapArena
    • Linux x86-64 architecture l1=1 l2=4194304 32MB = 4194304*8B (pointer size), each heapArena can manage 64MB of memory
    • 4M*64MB = 256TB memory

// runtime.heapArena
type heapArena struct {
	bitmap       [heapArenaBitmapBytes]byte // Whether the memory in the tag is used
	spans        [pagesPerArena]*mspan      // manager mspan
	pageInUse    [pagesPerArena / 8]uint8
	pageMarks    [pagesPerArena / 8]uint8
	pageSpecials [pagesPerArena / 8]uint8
	checkmarks   *checkmarksMap
	zeroedBase   uintptr                    //memory management base address
}

go memory management design

  1. Learning from TCMalloc, using multi-level buffering
    1. mcache thread memory allocation corresponding structure
    2. mcentral central memory allocation corresponding structure
    3. mheap page heap memory allocation corresponding structure
    4. mspan memory management unit structure, corresponding to the class table
  2. Memory allocator uses linear/free list combination
    1. Micro objects use linear allocation in mcache, otherwise the same as small objects
    2. The small objects are allocated by the class table in turn, and finally allocated using the free linked list.
    3. Large objects are allocated directly using the free linked list in mheap

[External link image transfer failed, the source site may have anti-leech mechanism, it is recommended to save the image and upload it directly (img-o0nLBZ6R-1652891154061)(https://img.draveness.me/2020-02-29-15829868066479-go -memory-layout.png)]

mspan

  1. Memory management unit, mcache, mheap all use this structure to record
  2. linked list structure
  3. Only manage one class of class, by class table
// runtime/mheap.go
// go:notinheap
type mspan struct {
	next *mspan     // mspan linked list next pointer
	prev *mspan     // A pointer on the mspan linked list
	list *mSpanList // mspan linked list head and tail node pointer
  

  // startAddr npages can calculate the range of memory blocks managed by mspan
	startAddr   uintptr        // Start address, the address of the managed page 
  freeindex   uintptr        // Scan the initial index of free objects in the page 
  limit       uintptr        // Data cut-off position Apply for the use of large object memory blocks
  
  allocCache  uint64         // The complement of allocBits is used to quickly query idle
	allocBits  *gcBits         // Memory usage, object bitmap
	gcmarkBits *gcBits         // gc mark case, object bitmap
	allocCount  uint16         // Number of objects allocated 
  
  nelems      uintptr        // The total number of objects corresponds to the object in the class table
	spanclass   spanClass      // classid corresponds to the class in the class table
  elemsize    uintptr        // Object size corresponds to bytes/obj in the class table
  npages      uintptr        // The number of pages managed by span*8KB corresponds to bytes/span in the class table
	state       mSpanStateBox  // memory management status
  
	manualFreeList gclinkptr   // list of free objects


	needzero    uint8    //
	sweepgen    uint32   //number of scans
  
  // divMagic
	divMul      uint16        
	baseMask    uint16     
  divShift    uint8         
	divShift2   uint8    
  
	speciallock mutex         
	specials    *special      
}

mcache

  • Micro Object Allocator (Linear Allocator)
  • Small object allocator, buffer 134 class mspan for internal use by threads
  • Large object allocator, use mheap directly, other allocations follow mcache mcentral mheap
// runtime/mcache.go
// go:notinheap
type mcache struct {

  // Micro object allocator
	tiny       uintptr   //tiny object base address
	tinyoffset uintptr   //The offset of the next free memory
	tinyAllocs uintptr   //tiny object allocation number

  // Small object allocator
  // Even-bit pointer type (requires gc scan) Odd-bit non-pointer type
	alloc [numSpanClasses]*mspan //68*2 mspan list grouped by class

	stackcache [_NumStackOrders]stackfreelist // stack cache
	flushGen uint32
  nextSample uintptr // trigger heap sample after allocating this many bytes
	scanAlloc  uintptr // bytes of scannable heap allocated
}
//initialization
//This method is called when P is initialized when processing is run
func allocmcache() *mcache {
	var c *mcache
	systemstack(func() {
		lock(&mheap_.lock)
		c = (*mcache)(mheap_.cachealloc.alloc())
		c.flushGen = mheap_.sweepgen
		unlock(&mheap_.lock)
	})
	for i := range c.alloc {
    // All mspan s use placeholder padding
		c.alloc[i] = &emptymspan
	}
	c.nextSample = nextSample()
	return c
}


//replace
func (c *mcache) refill(spc spanClass) {
	
	s := c.alloc[spc]

  // other code
  ...
	// Apply msapn from the central cache
	s = mheap_.central[spc].mcentral.cacheSpan()
	if s == nil {
		throw("out of memory")
	}

	if uintptr(s.allocCount) == s.nelems {
		throw("span has no free space")
	}

  // other code
  ...
	c.alloc[spc] = s
}

mcentral

  • Only manage one class of class, by class table

  • Two mspan lists with/without free objects

    • The mspan list is shared by multiple threads and protected by a mutex
// runtime/mcentral.go
type mcentral struct {
	spanclass spanClass //classid
	partial [2]spanSet // List of free span s
	full    [2]spanSet // List of non-idle span s
}


type spanSet struct {
	spineLock mutex          // multi-threaded access lock
	spine     unsafe.Pointer // pointer to []span
	spineLen  uintptr        // length
	spineCap  uintptr        // capacity
	index headTailIndex      // Head pointer for the first 32 bits, tail pointer for the last 32 bits
}
// initialization
func (c *mcentral) init(spc spanClass) {
	c.spanclass = spc   // initialize spanclass
	lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine) // Lock
	lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine) // Lock
	lockInit(&c.full[0].spineLock, lockRankSpanSetSpine)    // Lock
	lockInit(&c.full[1].spineLock, lockRankSpanSetSpine)    // Lock
}


// manage mecache get mspan
func (c *mcentral) cacheSpan() *mspan {
  // other code
  ...
	var s *mspan

	// 1. Try to find from structures that have been cleaned up/contain free space
	if s = c.partialSwept(sg).pop(); s != nil {
		goto havespan
	}

	// 2. Try to find in structures that have not been cleaned up / contain free space
	for ; spanBudget >= 0; spanBudget-- {
		s = c.partialUnswept(sg).pop()
		if s == nil {
			break
		}
		if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
      //clean up
			s.sweep(true)
			goto havespan
		}
	}

  // 3. Attempt to find in structures that have not been cleaned up/do not contain free space
	for ; spanBudget >= 0; spanBudget-- {
		s = c.fullUnswept(sg).pop()
		if s == nil {
			break
		}
		if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
			// clean up
			s.sweep(true)
			// Check for free
			freeIndex := s.nextFreeIndex()
			if freeIndex != s.nelems {
				s.freeindex = freeIndex
				goto havespan
			}
      //
			c.fullSwept(sg).push(s)
		}
	}
	// other code
  ...
	//4. mcentral applies to mheap for capacity expansion
	s = c.grow()
	if s == nil {
		return nil
	}

havespan:
	// other code
  ...
	n := int(s.nelems) - int(s.allocCount)
	if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
		throw("span has no free objects")
	}
	freeByteBase := s.freeindex &^ (64 - 1)
	whichByte := freeByteBase / 8
	s.refillAllocCache(whichByte)
	s.allocCache >>= s.freeindex % 64

	return s
}


// Expansion
func (c *mcentral) grow() *mspan {
  
	npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
	size := uintptr(class_to_size[c.spanclass.sizeclass()])

	s := mheap_.alloc(npages, c.spanclass, true)
	if s == nil {
		return nil
	}

	// Use division by multiplication and shifts to quickly compute:
	// n := (npages << _PageShift) / size
	n := (npages << _PageShift) >> s.divShift * uintptr(s.divMul) >> s.divShift2
	s.limit = s.base() + size*n
	heapBitsForAddr(s.base()).initSpan(s) // Clean up the bitmap on the heap
	return s
}


mheap

//go:notinheap
type mheap struct {
	lock      mutex     //multithreaded lock
	pages     pageAlloc // page allocator
	allspans []*mspan // All mspan s are protected by locks
  // heapArena two-dimensional Greek memory
	arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
   //Manage 68 spans of mcentral
	central [numSpanClasses]struct {
		mcentral mcentral
		pad      [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte
	}
	
  //other fields
  ...
}

// runtime.heapArena manages 64MB
type heapArena struct {
	bitmap       [heapArenaBitmapBytes]byte // Whether the memory in the tag is used
	spans        [pagesPerArena]*mspan      // manager mspan
	pageInUse    [pagesPerArena / 8]uint8
	pageMarks    [pagesPerArena / 8]uint8
	pageSpecials [pagesPerArena / 8]uint8
	checkmarks   *checkmarksMap
	zeroedBase   uintptr                    //memory management base address
}
// initialization
func (h *mheap) init() {
	lockInit(&h.lock, lockRankMheap)
	lockInit(&h.speciallock, lockRankMheapSpecial)

	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)

	h.spanalloc.zero = false

  // initialize mcentral
	for i := range h.central {
		h.central[i].mcentral.init(spanClass(i))
	}

	h.pages.init(&h.lock, &memstats.gcMiscSys)
}

// manage
// Get new mspan from system stack
func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
	var s *mspan
	systemstack(func() {
		if h.sweepdone == 0 {
      // Reclaim some memory
			h.reclaim(npages)
		}
    // Assign new snap-in
		s = h.allocSpan(npages, spanAllocHeap, spanclass)
	})
  // other code
  ...
	return s
}

//
func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass) (s *mspan) {
	// Function-global state.
	gp := getg()
	base, scav := uintptr(0), uintptr(0)
  // other code
  ...
	pp := gp.m.p.ptr()
	if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 {
		c := &pp.pcache

		// If the processor's page buffer is empty
		if c.empty() {
			lock(&h.lock)
      // The global page allocator places p's page buffer
			*c = h.pages.allocToCache()
			unlock(&h.lock)
		}

		// The processor page buffer requests memory
		base, scav = c.alloc(npages)
		if base != 0 {
			s = h.tryAllocMSpan()
			if s != nil {
				goto HaveSpan
			}
		}
	}

	lock(&h.lock)

  // other code
  ...

	if base == 0 {
		// Global allocator allocation page
		base, scav = h.pages.alloc(npages)
		if base == 0 {
      // If expansion fails, return nil
			if !h.grow(npages) {
				unlock(&h.lock)
				return nil
			}
      // Reacquire
			base, scav = h.pages.alloc(npages)
      // Fail again and throw exception
			if base == 0 {
				throw("grew heap, but no adequate free space found")
			}
		}
	}
	if s == nil {
		s = h.allocMSpanLocked()
	}

  // other code
  ...

	unlock(&h.lock)

HaveSpan:
	// At this point, both s != nil and base != 0, and the heap
	// lock is no longer held. Initialize the span.
	s.init(base, npages)
	if h.allocNeedsZero(base, npages) {
		s.needzero = 1
	}
	nbytes := npages * pageSize
	if typ.manual() {
		s.manualFreeList = 0
		s.nelems = 0
		s.limit = s.base() + s.npages*pageSize
		s.state.set(mSpanManual)
	} else {
		// We must set span properties before the span is published anywhere
		// since we're not holding the heap lock.
		s.spanclass = spanclass
		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
			s.elemsize = nbytes
			s.nelems = 1

			s.divShift = 0
			s.divMul = 0
			s.divShift2 = 0
			s.baseMask = 0
		} else {
			s.elemsize = uintptr(class_to_size[sizeclass])
			s.nelems = nbytes / s.elemsize

			m := &class_to_divmagic[sizeclass]
			s.divShift = m.shift
			s.divMul = m.mul
			s.divShift2 = m.shift2
			s.baseMask = m.baseMask
		}

		// Initialize mark and allocation structures.
		s.freeindex = 0
		s.allocCache = ^uint64(0) // all 1s indicating all free.
		s.gcmarkBits = newMarkBits(s.nelems)
		s.allocBits = newAllocBits(s.nelems)

	// other code
  ...


	h.setSpans(s.base(), npages, s)
  // other code
  ...
	return s
}

// Expansion
func (h *mheap) grow(npage uintptr) bool {
	assertLockHeld(&h.lock)

	// Calculate the total page memory required
	ask := alignUp(npage, pallocChunkPages) * pageSize

	totalGrowth := uintptr(0)
	end := h.curArena.base + ask
	nBase := alignUp(end, physPageSize)
	if nBase > h.curArena.end || /* overflow */ end < h.curArena.base {
    //The arena area is not enough and needs to be expanded
		av, asize := h.sysAlloc(ask)
		if av == nil {
			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
			return false
		}

		if uintptr(av) == h.curArena.end {
      //Expansion of the old area
			h.curArena.end = uintptr(av) + asize
		} else {
      //new area 
			if size := h.curArena.end - h.curArena.base; size != 0 {
				h.pages.grow(h.curArena.base, size)
				totalGrowth += size
			}
			// Switch to the new space.
			h.curArena.base = uintptr(av)
			h.curArena.end = uintptr(av) + asize
		}
		atomic.Xadd64(&memstats.heap_released, int64(asize))
		stats := memstats.heapStats.acquire()
		atomic.Xaddint64(&stats.released, int64(asize))
		memstats.heapStats.release()
		nBase = alignUp(h.curArena.base+ask, physPageSize)
	}

	// update arena
	v := h.curArena.base
	h.curArena.base = nBase
  // update page allocator
	h.pages.grow(v, nBase-v)
	totalGrowth += nBase - v

  // Reclaim free memory pages
	if retained := heapRetained(); retained+uint64(totalGrowth) > h.scavengeGoal {
		todo := totalGrowth
		if overage := uintptr(retained + uint64(totalGrowth) - h.scavengeGoal); todo > overage {
			todo = overage
		}
		h.pages.scavenge(todo, false)
	}
	return true
}

memory allocation

// The new operation is translated by the compiler to call this method
func newobject(typ *_type) unsafe.Pointer {
	return mallocgc(typ.size, typ, true)
}

// 
func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
	// other code
  ...
	mp := acquirem()  // Placement M is preempted by GC
  // other code
  ...
	mp.mallocing = 1
	shouldhelpgc := false
  // 1. Specifications of computing memory
	dataSize := size
  
  // 2. mcache finds the block memory of the appropriate size
	c := getMCache()  // Get current mcache
  // other code
  ...
	var span *mspan
	var x unsafe.Pointer
	noscan := typ == nil || typ.ptrdata == 0
	if size <= maxSmallSize {
		if noscan && size < maxTinySize {
      // 2.1 Micro object allocation
      // 1. Use the micro dispenser first
      // 2. Use mcache/mcentral/heaparean
			off := c.tinyoffset
			// perform memory alignment
			if size&7 == 0 {
				off = alignUp(off, 8)
			} else if sys.PtrSize == 4 && size == 12 {
				off = alignUp(off, 8)
			} else if size&3 == 0 {
				off = alignUp(off, 4)
			} else if size&1 == 0 {
				off = alignUp(off, 2)
			}
      //distribute
			if off+size <= maxTinySize && c.tiny != 0 {
				x = unsafe.Pointer(c.tiny + off)
				c.tinyoffset = off + size
				c.tinyAllocs++
				mp.mallocing = 0
				releasem(mp)
				return x
			}
			//If not, allocate through span
			span = c.alloc[tinySpanClass]
			v := nextFreeFast(span)
			if v == 0 {
				v, span, shouldhelpgc = c.nextFree(tinySpanClass)
			}
      // return new memory
			x = unsafe.Pointer(v)
			(*[2]uint64)(x)[0] = 0
			(*[2]uint64)(x)[1] = 0
			if size < c.tinyoffset || c.tiny == 0 {
				c.tiny = uintptr(x)
				c.tinyoffset = size
			}
			size = maxTinySize
		} else {
      // 2.2 Small object allocation
      // use mcache/mcentral/heaparean
			var sizeclass uint8
			if size <= smallSizeMax-8 {
				sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
			} else {
				sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
			}
			size = uintptr(class_to_size[sizeclass])
			spc := makeSpanClass(sizeclass, noscan)
			span = c.alloc[spc]
			v := nextFreeFast(span) // Check if the management has free space
			if v == 0 {
        //Re-mcache/mcentral/heaparean to get the snap-in
				v, span, shouldhelpgc = c.nextFree(spc)
			}
			x = unsafe.Pointer(v)
			if needzero && span.needzero != 0 {
				memclrNoHeapPointers(unsafe.Pointer(v), size)
			}
		}
	} else {
    // 2.3 Large Object Allocation
    // Assign directly on the pair
		shouldhelpgc = true
    // mheap requests a snap-in with classid = 0
		span = c.allocLarge(size, needzero, noscan)
		span.freeindex = 1
		span.allocCount = 1
		x = unsafe.Pointer(span.base())
		size = span.elemsize
	}

	// other code
  ...
  
	// memory barrier
	publicationBarrier()

	// other code
  ...
	mp.mallocing = 0
	releasem(mp)
	// other code
  ...
  if shouldhelpgc {
		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
			gcStart(t)
		}
	}

	return x
}


Tags: Go

Posted by kellz on Wed, 18 May 2022 21:45:55 +0300