Created
October 16, 2017 15:33
-
-
Save pdarragh/90c9de2aa659a0ddd87725f2c9f7773a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.6 | |
import random | |
from abc import ABC, abstractmethod | |
from copy import copy | |
from typing import Any, Callable, Dict, Iterable, List, NamedTuple, Optional, Tuple, Type | |
CROSS_VALIDATION_TRAINING_EPOCHS = 10 | |
DEVELOPMENT_SET_TRAINING_EPOCHS = 20 | |
VERBOSE = False | |
class Vector: | |
def __init__(self): | |
self._vals = [] | |
def __copy__(self): | |
v = Vector() | |
v._vals = copy(self._vals) | |
return v | |
def __str__(self): | |
return f"< {', '.join(map(str, self._vals))} >" | |
def __repr__(self): | |
return str(self) | |
def __len__(self): | |
return len(self._vals) | |
def __iter__(self): | |
return iter(self._vals) | |
def __getitem__(self, item): | |
return self._vals[item] | |
def __setitem__(self, key, value): | |
self._vals[key] = value | |
def __add__(self, other): | |
if other is None or not isinstance(other, Vector): | |
return NotImplemented | |
if len(other) != len(self): | |
raise ValueError("cannot add vectors of different lengths") | |
new_vector = Vector() | |
for i in range(len(self)): | |
new_vector.append(self[i] + other[i]) | |
return new_vector | |
def __radd__(self, other): | |
return self.__add__(other) | |
def __iadd__(self, other): | |
if not isinstance(other, Vector): | |
return NotImplemented | |
if len(other) != len(self): | |
raise ValueError("cannot add vectors of different lengths") | |
for i in range(len(self)): | |
self[i] = self[i] + other[i] | |
return self | |
def __mul__(self, other): | |
if isinstance(other, Vector): | |
if len(other) != len(self): | |
raise ValueError("cannot multiply vectors of different lengths") | |
dot_product = 0 | |
for i in range(len(self)): | |
dot_product += self[i] * other[i] | |
return dot_product | |
elif isinstance(other, float) or isinstance(other, int): | |
scaled_vector = Vector() | |
for v in self: | |
scaled_vector.append(v * other) | |
return scaled_vector | |
else: | |
return NotImplemented | |
def __rmul__(self, other): | |
return self.__mul__(other) | |
def __imul__(self, other): | |
if isinstance(other, float) or isinstance(other, int): | |
for i in range(len(self)): | |
self[i] = self[i] * other | |
return self | |
else: | |
return NotImplemented | |
def append(self, item): | |
self._vals.append(item) | |
DataTuple = NamedTuple('DataItem', [('label', int), ('vector', Vector)]) | |
Data = List[DataTuple] | |
LearningCurvePoint = NamedTuple('LearningCurvePoint', [('epoch', int), ('accuracy', float)]) | |
LearningCurveData = List[LearningCurvePoint] | |
TestResult = NamedTuple('PerceptronResult', [('best_margin', float), ('best_learning_rate', float), ('updates', int), | |
('kcv_accuracy', float), ('dev_accuracy', float), | |
('test_accuracy', float), ('learning_curve', LearningCurveData)]) | |
def _small_random_value(low=-0.1, high=0.1) -> float: | |
return random.uniform(low, high) | |
class Perceptron(ABC): | |
@abstractmethod | |
def reset_tests(self): | |
pass | |
@abstractmethod | |
def train(self, dt: DataTuple): | |
pass | |
@abstractmethod | |
def test(self, dt: DataTuple): | |
pass | |
@abstractmethod | |
def print_stats(self): | |
pass | |
@abstractmethod | |
def print_results(self): | |
pass | |
@property | |
@abstractmethod | |
def n(self) -> float: | |
pass | |
@property | |
@abstractmethod | |
def updates(self) -> int: | |
pass | |
@updates.setter | |
def updates(self, value: int): | |
pass | |
@property | |
@abstractmethod | |
def precision(self) -> float: | |
pass | |
@property | |
@abstractmethod | |
def recall(self) -> float: | |
pass | |
@property | |
@abstractmethod | |
def accuracy(self) -> float: | |
pass | |
class SimplePerceptron(Perceptron): | |
def __init__(self, learning_rate: float, vector_length: int, **_): | |
self._n = learning_rate | |
self.u = 0 | |
self.b = _small_random_value() | |
self._updates = 0 | |
self.examples = 0 | |
self.vector_length = vector_length | |
self.w = self._initialize_vector(_small_random_value) | |
self.tp = 0 # true positives | |
self.tn = 0 # true negatives | |
self.fp = 0 # false positives | |
self.fn = 0 # false negatives | |
def reset_tests(self): | |
self.tp = 0 | |
self.tn = 0 | |
self.fp = 0 | |
self.fn = 0 | |
def _copy_help(self, perceptron): | |
perceptron.u = self.u | |
perceptron.b = self.b | |
perceptron.updates = self._updates | |
perceptron.examples = self.examples | |
perceptron.w = copy(self.w) | |
perceptron.tp = self.tp | |
perceptron.tn = self.tn | |
perceptron.fp = self.fp | |
perceptron.fn = self.fn | |
def __copy__(self): | |
p = type(self)(self._n, self.vector_length) | |
self._copy_help(p) | |
return p | |
def _initialize_vector(self, init_func: Callable[[], float]) -> Vector: | |
v = Vector() | |
for _ in range(self.vector_length): | |
v.append(init_func()) | |
return v | |
def _extract_dt(self, dt: DataTuple): | |
y = dt.label | |
x = dt.vector | |
wx = self.w * x | |
t = y * (wx + self.b) | |
return t, y, x | |
def train(self, dt: DataTuple): | |
self.examples += 1 | |
t, y, x = self._extract_dt(dt) | |
if t < self.u: | |
# Perform an update according to: | |
# w <- w + nyx | |
# b <- b + ny | |
ny = self.n * y | |
nyx = ny * x | |
self.w = self.w + nyx | |
self.b += ny | |
self.updates += 1 | |
def test(self, dt: DataTuple): | |
y = dt.label | |
x = dt.vector | |
while len(x) < self.vector_length: | |
x.append(0) | |
wx = self.w * x | |
t = -1 if wx + self.b < 0 else 1 | |
self._count_test_value(t, y) | |
def _count_test_value(self, guess: int, actual: int): | |
if guess == actual == 1: | |
self.tp += 1 | |
elif guess == actual == -1: | |
self.tn += 1 | |
elif guess == 1 and actual == -1: | |
self.fp += 1 | |
elif guess == -1 and actual == 1: | |
self.fn += 1 | |
else: | |
raise ValueError(f"invalid test value: {guess}") | |
def print_stats(self): | |
print() | |
print(f"precision: {self.precision * 100 : 0.2f}% ({self.precision})") | |
print(f"recall: {self.recall * 100 : 0.2f}% ({self.recall})") | |
print(f"accuracy: {self.accuracy * 100 : 0.2f}% ({self.accuracy})") | |
print() | |
print(f"true positives: {self.tp}") | |
print(f"true negatives: {self.tn}") | |
print(f"false positives: {self.fp}") | |
print(f"false negatives: {self.fn}") | |
print() | |
print(f"total test examples: {self.tp + self.tn + self.fp + self.fn}") | |
print(f"total train examples: {self.examples}") | |
print(f"total updates performed: {self.updates}") | |
def print_results(self): | |
print() | |
print(f"learning rate: {self.n}") | |
self.print_stats() | |
@property | |
def n(self): | |
return self._n | |
@property | |
def updates(self): | |
return self._updates | |
@updates.setter | |
def updates(self, value: int): | |
self._updates = value | |
@property | |
def precision(self) -> float: | |
return self.tp / (self.tp + self.fp) | |
@property | |
def recall(self) -> float: | |
return self.tp / (self.tp + self.fn) | |
@property | |
def accuracy(self) -> float: | |
sum_total = (self.tp + self.tn + self.fp + self.fn) | |
if sum_total == 0: | |
raise RuntimeError("no testing was performed on the perceptron") | |
return (self.tp + self.tn) / sum_total | |
class DynamicPerceptron(SimplePerceptron): | |
def __init__(self, learning_rate: float, vector_length: int, **_): | |
super().__init__(learning_rate, vector_length) | |
self.time = 0 | |
def __copy__(self): | |
p = super().__copy__() | |
p.time = self.time | |
return p | |
def train(self, dt: DataTuple): | |
super().train(dt) | |
self.time += 1 | |
def print_results(self): | |
print() | |
print(f"initial learning rate: {self._n}") | |
print(f" final learning rate: {self.n}") | |
print(f" time steps taken: {self.time}") | |
self.print_stats() | |
@property | |
def n(self): | |
return self._n / (self.time + 1) | |
class MarginPerceptron(DynamicPerceptron): | |
def __init__(self, learning_rate: float, margin: float, vector_length: int, **_): | |
super().__init__(learning_rate, vector_length) | |
self.u = margin | |
def __copy__(self): | |
p = type(self)(self._n, self.u, self.vector_length) | |
self._copy_help(p) | |
return p | |
def print_results(self): | |
print() | |
print(f"margin: {self.u}") | |
super().print_results() | |
class AveragedPerceptron(SimplePerceptron): | |
def __init__(self, learning_rate: float, vector_length: int, **_): | |
super().__init__(learning_rate, vector_length) | |
self.a = self._initialize_vector(lambda: 0) | |
self.ba = 0 | |
def __copy__(self): | |
p = super().__copy__() | |
p.a = copy(self.a) | |
p.ba = self.ba | |
return p | |
def train(self, dt: DataTuple): | |
super().train(dt) | |
self.a += self.w | |
self.ba += self.b | |
def test(self, dt: DataTuple): | |
y = dt.label | |
x = dt.vector | |
while len(x) < self.vector_length: | |
x.append(0) | |
ax = self.a * x | |
t = -1 if ax + self.ba < 0 else 1 | |
self._count_test_value(t, y) | |
class AggressiveMarginPerceptron(MarginPerceptron): | |
def __init__(self, margin: float, vector_length: int, **_): | |
super().__init__(0, margin, vector_length) | |
def __copy__(self): | |
p = type(self)(self.u, self.vector_length) | |
self._copy_help(p) | |
return p | |
def train(self, dt: DataTuple): | |
self.examples += 1 | |
y = dt.label | |
x = dt.vector | |
wx = self.w * x | |
ywx = y * wx | |
if ywx <= self.u: | |
top = self.u - (y * wx) | |
bottom = (x * x) + 1 | |
n = top / bottom | |
nyx = (n * y) * x | |
self.w += nyx | |
self.updates += 1 | |
def test_perceptron(perceptron: Perceptron, data: Data): | |
for dt in data: | |
perceptron.test(dt) | |
def cross_validate(perceptron_class: Type[Perceptron], perceptron_args: Dict[str, Any], training_data: List[Data], | |
epochs=CROSS_VALIDATION_TRAINING_EPOCHS) -> float: | |
k = len(training_data) | |
ps = [] | |
for i in range(k): | |
p = perceptron_class(**perceptron_args) | |
# Put all the examples together so they can be shuffled for proper training. | |
accumulated_training_data = [dt for j in range(k) for dt in training_data[j] if j != i] | |
for j in range(epochs): | |
if VERBOSE: | |
print(f" fold: {i}, epoch: {j}") | |
# For each epoch, shuffle the examples and train against them. | |
random.shuffle(accumulated_training_data) | |
for dt in accumulated_training_data: | |
p.train(dt) | |
# Test the perceptron's performance and save it for later analysis. | |
test_perceptron(p, training_data[i]) | |
ps.append(p) | |
# Calculate the average accuracy for establishing the performance of the hyper-parameters. | |
avg_accuracy = sum(p.accuracy for p in ps) / len(ps) | |
return avg_accuracy | |
def k_fold_cross_validate(perceptron_class: Type[Perceptron], training_data: List[Data], vector_length: int, | |
learning_rates: List[float], margins: List[float],) -> Tuple[float, float, float]: | |
k_fold_results = [] | |
for margin in margins: | |
for learning_rate in learning_rates: | |
if VERBOSE: | |
print(f"Cross-validation on {perceptron_class.__name__}: u={margin}, n={learning_rate}") | |
p_args = { | |
'learning_rate': learning_rate, | |
'margin': margin, | |
'vector_length': vector_length, | |
} | |
avg_accuracy = cross_validate(perceptron_class, p_args, training_data) | |
tup = (margin, learning_rate, avg_accuracy) | |
k_fold_results.append(tup) | |
# Find the best combination of hyper-parameters. | |
best_margin, best_learning_rate, best_accuracy = max(k_fold_results, key=lambda t: t[2]) | |
if VERBOSE: | |
print(f" best u: {best_margin}, best n: {best_learning_rate}") | |
return best_margin, best_learning_rate, best_accuracy | |
def train_and_test_perceptron_with_cross_validation(perceptron_class: Type[Perceptron], training_data: [Data], | |
testing_data: Data, development_data: Data, vector_length: int, | |
learning_rates: List[float], margins: List[float], | |
epochs=DEVELOPMENT_SET_TRAINING_EPOCHS) -> TestResult: | |
# k-fold validation for each desired margin and learning rate | |
best_margin, best_learning_rate, best_accuracy = k_fold_cross_validate(perceptron_class, training_data, | |
vector_length, learning_rates, margins) | |
# Accumulate all training data into one set. | |
accumulated_training_data = [dt for i in range(len(training_data)) for dt in training_data[i]] | |
# Perform training multiple epochs for testing against the development set. | |
perceptron_args = { | |
'learning_rate': best_learning_rate, | |
'margin': best_margin, | |
'vector_length': vector_length, | |
} | |
p = perceptron_class(**perceptron_args) | |
if VERBOSE: | |
print(f"Development training on {perceptron_class.__name__}: u={best_margin}, n={best_learning_rate}") | |
development_results: List[Tuple[int, Perceptron]] = [] | |
learning_curve: LearningCurveData = [] | |
for i in range(epochs): | |
if VERBOSE: | |
print(f" epoch: {i}") | |
random.shuffle(accumulated_training_data) | |
for dt in accumulated_training_data: | |
p.train(dt) | |
# Compute accuracy against the development set. | |
test_perceptron(p, development_data) | |
# Keep a copy of the perceptron at this point. | |
epoch_pair = (i, copy(p)) | |
development_results.append(epoch_pair) | |
# Record the accuracy for plotting the learning curve. | |
curve_point = LearningCurvePoint(i, p.accuracy) | |
learning_curve.append(curve_point) | |
# Reset the testing results of the current perceptron (to not taint the performance measurements). | |
p.reset_tests() | |
# Pick the best perceptron. | |
epoch, best_perceptron = max(development_results, key=lambda x: x[1].accuracy) | |
if VERBOSE: | |
print(f"Testing on {perceptron_class.__name__} using classifier from epoch {epoch}") | |
# Record the accuracy from the development set's test. | |
development_accuracy = best_perceptron.accuracy | |
# Test using the test set. | |
best_perceptron.reset_tests() | |
test_perceptron(best_perceptron, testing_data) | |
# Return the perceptron for later analysis. | |
return TestResult(best_margin, best_learning_rate, best_perceptron.updates, best_accuracy, development_accuracy, | |
best_perceptron.accuracy, learning_curve) | |
def run_perceptron(perceptron_class: Type[Perceptron], training_data: [Data], testing_data: Data, | |
development_data: Data, vector_length: int, learning_rates: List[float], margins: List[float] | |
) -> TestResult: | |
if not issubclass(perceptron_class, Perceptron): | |
raise ValueError(f"invalid perceptron class: {perceptron_class}") | |
return train_and_test_perceptron_with_cross_validation(perceptron_class, training_data, testing_data, | |
development_data, vector_length, learning_rates, margins) | |
PerceptronTest = NamedTuple('PerceptronTest', [('name', str), ('perceptron_class', Type[Perceptron]), | |
('learning_rates', List[Optional[float]]), | |
('margins', List[Optional[float]])]) | |
def run_tests(training_data: [Data], testing_data: Data, development_data: Data) -> Iterable[Tuple[str, TestResult]]: | |
# Prepare the various tests. | |
tests = [ | |
PerceptronTest('3.2.1. Simple Perceptron', SimplePerceptron, [1, 0.1, 0.01], [None]), | |
PerceptronTest('3.2.2. Dynamic Perceptron', DynamicPerceptron, [1, 0.1, 0.01], [None]), | |
PerceptronTest('3.2.3. Margin-Sensitive Perceptron', MarginPerceptron, [1, 0.1, 0.01], [1, 0.1, 0.01]), | |
PerceptronTest('3.2.4. Averaged Perceptron', AveragedPerceptron, [1, 0.1, 0.01], [None]), | |
PerceptronTest('3.2.5. Aggressive Margin-Sensitive Perceptron', AggressiveMarginPerceptron, | |
[None], [1, 0.1, 0.01]), | |
] | |
# Ensure all vectors are the same length from the beginning. | |
max_vector_length = 0 | |
for d in training_data: | |
vector_length = len(max(d, key=lambda x: len(x.vector)).vector) | |
max_vector_length = max(max_vector_length, vector_length) | |
for d in training_data + [testing_data]: | |
for dt in d: | |
while len(dt.vector) < max_vector_length: | |
dt.vector.append(0) | |
# Perform tests for each perceptron class. | |
for test in tests: | |
yield test.name, run_perceptron(test.perceptron_class, training_data, testing_data, development_data, | |
max_vector_length, test.learning_rates, test.margins) | |
def run_naive_test(data: Data, label: int) -> float: | |
total = 0 | |
correct = 0 | |
for dt in data: | |
total += 1 | |
if dt.label == label: | |
correct += 1 | |
return correct / total | |
def run_majority_baseline(training_data: [Data], testing_data: Data, development_data: Data | |
) -> Tuple[int, float, float]: | |
count = 0 | |
for d in training_data: | |
for dt in d: | |
count += dt.label | |
if count > 0: | |
label = 1 | |
elif count < 0: | |
label = -1 | |
else: | |
if random.randint(1, 2) == 1: | |
label = 1 | |
else: | |
label = -1 | |
dev_accuracy = run_naive_test(development_data, label) | |
test_accuracy = run_naive_test(testing_data, label) | |
return label, dev_accuracy, test_accuracy | |
def read_file(filename: str) -> Data: | |
data = [] | |
with open(filename) as f: | |
for line in f: | |
label, raw_sv = line.strip().split(maxsplit=1) | |
label = int(label) | |
vector = Vector() | |
for svv in raw_sv.strip().split(): | |
index, value = svv.split(':') | |
index = int(index) - 1 | |
value = float(value) | |
while len(vector) <= index: | |
vector.append(0) | |
vector[index] = value | |
pair = DataTuple(label, vector) | |
data.append(pair) | |
return data | |
def read_files(filenames: List[str]) -> List[Data]: | |
data_list = [] | |
for fn in filenames: | |
data_list.append(read_file(fn)) | |
return data_list | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('testing_data') | |
parser.add_argument('development_data') | |
parser.add_argument('training_data', nargs='+') | |
parser.add_argument('-v', '--verbose', action='store_true') | |
args = parser.parse_args() | |
if args.verbose: | |
VERBOSE = True | |
try: | |
data_for_training = read_files(args.training_data) | |
data_for_testing = read_file(args.testing_data) | |
data_for_development = read_file(args.development_data) | |
mb_label, dev_acc, test_acc = run_majority_baseline(data_for_training, data_for_testing, data_for_development) | |
print(f"Majority Baseline") | |
print() | |
print(f" Baseline label: {mb_label}") | |
print(f" Development accuracy: {dev_acc * 100:0.2f}% ({dev_acc})") | |
print(f" Testing accuracy: {test_acc * 100:0.2f}% ({test_acc})") | |
for name, result in run_tests(data_for_training, data_for_testing, data_for_development): | |
print() | |
print(name) | |
print() | |
print(f" (a) Hyper-parameters chosen:") | |
print(f" margin: {result.best_margin if result.best_margin is not None else 'N/A'}") | |
print(f" learning rate: {result.best_learning_rate if result.best_learning_rate is not None else 'N/A'}") | |
print(f" (b) Cross-validation accuracy: {result.kcv_accuracy * 100:0.2f}% ({result.kcv_accuracy})") | |
print(f" (c) Updates performed: {result.updates}") | |
print(f" (d) Development set accuracy: {result.dev_accuracy * 100:0.2f}% ({result.dev_accuracy})") | |
print(f" (e) Test set accuracy: {result.test_accuracy * 100:0.2f}% ({result.test_accuracy})") | |
print(f" (f) Learning curve data:") | |
length = max(len(str(result.learning_curve[-1][0])), len("epoch")) + 1 | |
print(f" {'epoch':>{length}} | accuracy") | |
print(f" {'-' * length}-|-{'-' * 19}") | |
for point in result.learning_curve: | |
print(f" {point.epoch:>{length}} | {point.accuracy}") | |
except KeyboardInterrupt: | |
print() | |
print("Quitting...") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/******************************************************************************* | |
* mm.c | |
* | |
* The best dynamic memory allocation library you've ever seen (possibly). | |
* | |
* This solution utilizes a segregated free list implementation. The following | |
* fifteen size classes are used: | |
* 1. 1, 2 or 3 words (all blocks must be three words, so this is the smallest) | |
* 2. 4 | |
* 3. 5 | |
* 4. 6 | |
* 5. 7 | |
* 6. 8 -> 15 | |
* 7. 16 -> 31 | |
* 8. 32 -> 63 | |
* 9. 64 -> 127 | |
* 10. 128 -> 255 | |
* 11. 256 -> 511 | |
* 12. 512 -> 1023 | |
* 13. 1024 -> 2047 | |
* 14. 2048 -> 4095 | |
* 15. 4096+ | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <unistd.h> | |
#include <string.h> | |
#include "mm.h" | |
#include "memlib.h" | |
/******************************************************************************* | |
* NOTE TO STUDENTS: Before you do anything else, please provide your | |
* identifying information in the following struct. | |
*/ | |
team_t team = { | |
// Team name | |
"darragh", | |
// First member's full name | |
"Pierce Darragh", | |
// First member's UID | |
"darragh", | |
// Second member's full name (leave blank if none) | |
"", | |
// Second member's UID (leave blank if none) | |
"" | |
}; | |
/******************************************************************************* | |
* Preprocessor Directives | |
* | |
* These macros help to keep track of what's going on in the program. | |
*/ | |
/* Whether to verify blocks as we go. */ | |
#define VERIFY 0 | |
#define VERBOSE 0 | |
#define SHOW_SHORT_WORDS 1 | |
/* Single word (4) or double word (8) alignment. */ | |
#define WSIZE 4 | |
#define ALIGNMENT 8 | |
/* Minimum size for any block. | |
* It's based on the minimum things required to build a free block: | |
* 1. header | |
* 2. next element pointer | |
* 3. previous element pointer | |
* 4. footer | |
*/ | |
#define MIN_BLOCK_SIZE 4 | |
/* The amount by which to adjust heap alotment. */ | |
#define CHUNKSIZE (1 << 12) | |
/* Return the larger of two numbers. */ | |
#define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
/* Pack a size and allocated bit into a word. */ | |
#define PACK(size, alloc) ((size) | (alloc)) | |
/* Read and write a word at address p. */ | |
#define GET(p) (*(unsigned int*)(p)) | |
#define PUT(p, val) (*(unsigned int*)(p) = (val)) | |
/* Read the size and allocated fields from address p. It is assumed that the | |
* pointer p points to a boundary tag. */ | |
#define SIZE_FROM_TAG(p) (GET(p) & ~0x7) | |
#define ALLOC_FROM_TAG(p) (GET(p) & 0x1) | |
/* Given a block pointer bp, compute the address of that block's header or | |
* footer information. */ | |
#define HEADER_FOR_BLOCK(bp) ((char*)(bp) - WSIZE) | |
#define FOOTER_FOR_BLOCK(bp) ((char*)(bp) + SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)) - (2 * WSIZE)) | |
#define FOOTER_FOR_PREV_BLOCK(bp) ((char*)HEADER_FOR_BLOCK(bp) - WSIZE) | |
/* Compute the address of the next or previous block from a given block | |
* pointer. Note that a previous block's address can only be gotten if it is a | |
* free block, as this implementation only stores footers for free blocks. */ | |
#define NEXT_BLOCK(bp) ((char*)(bp) + SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp))) | |
#define PREV_BLOCK(bp) ((char*)(bp) - SIZE_FROM_TAG((char*)(bp) - (2 * WSIZE))) | |
/* Pointers to the prologue and epilogue headers. */ | |
#define PROLOGUE_HEADER heap_listp; | |
#define EPILOGUE_HEADER (char*)((int)mem_heap_hi() / 4 * 4) | |
/* Maximum number of size classes. */ | |
#define NUM_SIZE_CLASSES 15 | |
/* Accessing the next or previous element of a free list from a given block. */ | |
#define NEXT_LIST_ELEMENT(bp) GET(bp) | |
#define PREV_LIST_ELEMENT(bp) GET(bp + WSIZE) | |
/* Set the next and previous elements of a list node. */ | |
#define SET_NEXT_LIST_ELEMENT(bp, next) PUT(bp, (unsigned int)next) | |
#define SET_PREV_LIST_ELEMENT(bp, prev) PUT(bp + WSIZE, (unsigned int)prev) | |
/* Acessing the head of a free list given a specific size class. | |
* This is just some simple arithmetic. The size class pointers are actually | |
* stored in a descending order (largest class first in the prologue). This is | |
* because the arithmetic is done relative to the heap list pointer - which | |
* comes *after* the prologue header. */ | |
#define SIZE_CLASS_HEAD(size_class) (heap_listp - ((size_class * WSIZE) + WSIZE)) | |
/******************************************************************************* | |
* Global Variables | |
* | |
* The assignment specifies that there may not be any global data structures, | |
* but global scalar variables such as integers and pointers are perfectly | |
* legal. | |
*/ | |
static char* heap_listp = 0; | |
/******************************************************************************* | |
* Forward Declarations | |
* | |
* Methods which are defined only within this file are forward-declared here. | |
*/ | |
// Helper methods. | |
static void* extend_heap(size_t words); | |
static size_t get_adjusted_size(size_t size); | |
static void* coalesce_and_integrate(void* bp); | |
static void integrate(void* bp); | |
static void* coalesce(void* bp); | |
static void place(void* bp, size_t size); | |
static void remove_block_from_free_list(void* bp); | |
static void* find_fit(size_t size); | |
static void pack_tag(void* ptr, size_t size, int alloc); | |
static int size_class_for_size(size_t size); | |
static void* first_element_from_size_class(int size_class); | |
static void* size_class_pointer_for_size(size_t size); | |
// Printout methods. | |
static void print_address(void* ptr); | |
static int short_address(void* ptr); | |
static int short_word_address(void* ptr); | |
static void printblock(void* bp); | |
static int checkblock(void* bp); | |
static int checkheap(int verbose); | |
/******************************************************************************* | |
* Core Implementation | |
* | |
* These are the methods that define this team's core malloc implementation, | |
* specifically: | |
* - mm_init initializes the heap and headers | |
* - mm_malloc allocates memory | |
* - mm_free frees memory | |
* - mm_realloc reallocates a pointer for further use | |
*/ | |
/**** | |
* mm_init | |
* | |
* Initialize the malloc package. | |
* | |
* Since this version implements a segregated free list, I wanted an easy, | |
* reliable method of keeping track of where all the different free lists exist. | |
* The simplest solution seemed to be to place pointers into the header. Thus | |
* the heap has the following shape: | |
* | |
* Padding Epilogue header | |
* Prologue header | | | |
* | | Blocks in memory | | |
* | | | | | |
* +---+---+---+---+---+---+---+-|-+-|-+--- ---|---------------+-|-+ | |
* | L | * * * * * * * * * | S | P | # | ... | E | | |
* +-|-+---+---+---+---+---+-|-+---*---+--- -------------------+---+ | |
* | \-------+-------/ | | | |
* | | | heap_listp | |
* | Pointers to other | | |
* | free lists | | |
* | | | |
* Largest free list pointer | | |
* Smallest free list pointer | |
* | |
* Each class size has its own singly-linked free list, with the heads of each | |
* list existing in the header. | |
* | |
* There are fifteen size classes, and each needs a word to store its pointer. | |
* This means that the initial heap -- without any usable space contained within | |
* -- is 18 words long. | |
* | |
* Note that the size class pointers are stored in *descending* order. This | |
* simplifies the arithmetic greatly. | |
*/ | |
int | |
mm_init(void) | |
{ | |
int i; | |
if (VERBOSE) | |
{ | |
// Print a nice header so we know where we're starting. | |
printf("--------------------------------------------------------------------------------\n"); | |
printf("\n"); | |
} | |
// Create the initial empty heap. | |
if ((heap_listp = mem_sbrk(18 * WSIZE)) == (void*)-1) | |
{ | |
// There was a problem. | |
return -1; | |
} | |
// Initialize the various tags. | |
for (i = 0; i < 15; ++i) | |
{ | |
// First initialize all of the list pointers. This shouldn't actually | |
// accomplish much, but I want to be explicit about generating the | |
// header segment. | |
pack_tag(heap_listp + (i * WSIZE), 0, 0); | |
} | |
pack_tag(heap_listp + (15 * WSIZE), 2 * WSIZE, 1); // Prologue header. | |
pack_tag(heap_listp + (16 * WSIZE), 2 * WSIZE, 1); // Prologue footer. | |
pack_tag(heap_listp + (17 * WSIZE), 0, 1); // Epilogue header. | |
// Move the pointer ahead to the beginning of the usable heap. | |
heap_listp += (16 * WSIZE); | |
// Verify. | |
if (VERIFY) | |
{ | |
mm_check(); | |
} | |
// Extend the empty heap with a free block of CHUNKSIZE bytes. | |
if (extend_heap(CHUNKSIZE / WSIZE) == NULL) | |
{ | |
// There was a problem. | |
return -1; | |
} | |
// Verify. | |
if (VERIFY) | |
{ | |
mm_check(); | |
} | |
// Success! | |
return 0; | |
} | |
/**** | |
* mm_malloc | |
* | |
* Allocate a block by incrementing the brk pointer. Always allocate a block | |
* whose size is a multiple of the alignment. | |
*/ | |
void* | |
mm_malloc(size_t size) | |
{ | |
if (VERBOSE) | |
{ | |
printf("mm_malloc %zu bytes (%zu words)\n", size, size / WSIZE); | |
} | |
size_t adjusted_size; | |
size_t extendsize; | |
char* bp; | |
// Ensure the heap has been initialized. | |
if (heap_listp == 0) | |
{ | |
mm_init(); | |
} | |
// Ignore requests to allocate zero bytes. | |
if (size == 0) | |
{ | |
return NULL; | |
} | |
// Get the adjusted size. This includes padding and headers and footers. | |
adjusted_size = get_adjusted_size(size); | |
// Search the free list for a fit. | |
if ((bp = find_fit(adjusted_size)) == NULL) | |
{ | |
// No fit found. Get more memory and then do the placement. | |
extendsize = MAX(adjusted_size, CHUNKSIZE); | |
if ((bp = extend_heap(extendsize / WSIZE)) == NULL) | |
{ | |
// Something went wrong. | |
return NULL; | |
} | |
} | |
place(bp, adjusted_size); | |
if (VERIFY) | |
{ | |
mm_check(); | |
} | |
return bp; | |
} | |
/**** | |
* mm_free | |
* | |
* Frees a block for reuse. | |
*/ | |
void | |
mm_free(void* bp) | |
{ | |
if (VERBOSE) | |
{ | |
printf("mm_free: "); | |
print_address(bp); | |
printf("\n"); | |
} | |
// Ensure the pointer points to something. | |
if (bp == 0) | |
{ | |
return; | |
} | |
// Ensure the heap has been initialized. | |
if (heap_listp == 0) | |
{ | |
mm_init(); | |
} | |
// Ensure the pointer is within the scope of the heap. | |
if (bp < (void*)heap_listp || bp > (void*)EPILOGUE_HEADER) | |
{ | |
return; | |
} | |
// If the block is free... don't do anything. | |
if (!ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp))) | |
{ | |
return; | |
} | |
// Get the size of this block. | |
size_t size = SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
// Update the header and footer for this block. | |
pack_tag(HEADER_FOR_BLOCK(bp), size, 0); | |
pack_tag(FOOTER_FOR_BLOCK(bp), size, 0); | |
// Add the newly-freed block to a list. | |
bp = coalesce_and_integrate(bp); | |
// Verify. | |
if (VERIFY) | |
{ | |
mm_check(); | |
} | |
} | |
/**** | |
* mm_realloc | |
* | |
* Implemented simply in terms of mm_malloc and mm_free | |
*/ | |
void* | |
mm_realloc( | |
void* bp, | |
size_t size ) | |
{ | |
if (VERBOSE) | |
{ | |
printf("mm_realloc: "); | |
print_address(bp); | |
printf("\n"); | |
printf(" %zu bytes (%zu words)\n", size, size / WSIZE); | |
} | |
void* new_block; | |
void* next_block; | |
size_t old_size = SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
size_t adjusted_size = get_adjusted_size(size); | |
// If the size is zero, then this is really just 'free'. | |
if (size == 0) | |
{ | |
if (VERBOSE) | |
{ | |
printf(" simple free\n"); | |
} | |
mm_free(bp); | |
return 0; | |
} | |
// If the pointer is null, then this is really 'malloc'. | |
if (bp == NULL) | |
{ | |
if (VERBOSE) | |
{ | |
printf(" simple malloc\n"); | |
} | |
return mm_malloc(size); | |
} | |
// Check if the reallocated size is less than (or equal to) the existing | |
// size. If it is, simply split the current block (if needed). | |
if (adjusted_size <= old_size) | |
{ | |
if (VERBOSE) | |
{ | |
printf(" old size usable\n"); | |
} | |
// Split away! Be sure to use the adjusted size. | |
place(bp, adjusted_size); | |
return bp; | |
} | |
// We don't have enough space in the current block. But what if the next | |
// block is free, and combined they could be large enough? | |
next_block = NEXT_BLOCK(bp); | |
if (!ALLOC_FROM_TAG(HEADER_FOR_BLOCK(next_block))) | |
{ | |
// It's free. Let's see if it's big enough. | |
if (old_size + SIZE_FROM_TAG(HEADER_FOR_BLOCK(next_block)) >= size) | |
{ | |
// It is! Let's combine them then. | |
if (VERBOSE) | |
{ | |
printf(" next block can be accumulated\n"); | |
} | |
// Remove the next block from its free list. | |
remove_block_from_free_list(next_block); | |
// Then adjust the headers of the current block to include the next | |
// block. This is a trick for 'place'. | |
pack_tag(HEADER_FOR_BLOCK(bp), old_size + SIZE_FROM_TAG(HEADER_FOR_BLOCK(next_block)), 1); | |
// Now "place" the new block. This will just create a new split | |
// between the now-larger block and the old next block. Be sure to | |
// use the adjusted size. | |
place(bp, adjusted_size); | |
return bp; | |
} | |
} | |
if (VERBOSE) | |
{ | |
printf(" allocating new block\n"); | |
} | |
// We need to allocate a new block and move the data. Let's start by | |
// creating a new allocated block. | |
new_block = mm_malloc(size); | |
// Ensure that the block was allocated properly. | |
if (!new_block) | |
{ | |
return 0; | |
} | |
if (VERBOSE) | |
{ | |
printf(" copying data: memcpy("); | |
print_address(new_block); | |
printf(", "); | |
print_address(bp); | |
printf(", %zu bytes (%zu words))\n", old_size, old_size / WSIZE); | |
} | |
// Copy the old data to the new location. | |
memcpy(new_block, bp, old_size); | |
if (VERBOSE) | |
{ | |
printf(" freeing old block: "); | |
print_address(bp); | |
printf("\n"); | |
} | |
// Free the old block. | |
mm_free(bp); | |
// Return the pointer to the new location. | |
return new_block; | |
} | |
/******************************************************************************* | |
* Helper Methods | |
* | |
* These methods are used by the Core Implementation methods to achieve their | |
* goals. | |
* - extend_heap Grows the heap. | |
* - get_adjusted_size Computes an adjusted size, which | |
* accounts for header, footer, & padding. | |
* - coalesce_and_integrate Handles coalescing and integration. | |
* - coalesce Combines neighboring free blocks. | |
* - integrate Adds a free block to a free list. | |
* - place Puts information into a block (and can | |
* then split the block as appropriate). | |
* - find_fit Finds the best block for a given size. | |
* - pack_tag Puts information into a tag. | |
* - size_class_for_size Returns an integer describing the size | |
* class for a given size (in bytes). | |
* - first_element_from_size_class Gets a pointer to the first element. | |
* - size_class_pointer_for_size Gets a pointer to the head of a list for | |
* a given size (in bytes). | |
*/ | |
/**** | |
* extend_heap | |
* | |
* Handles the adjustment of the heap whenever more space is needed. | |
*/ | |
static void* | |
extend_heap(size_t words) | |
{ | |
if (VERBOSE) | |
{ | |
printf("extend_heap %zu bytes (%zu words)\n", words * WSIZE, words); | |
} | |
char* bp; | |
size_t size; // The number of bytes to extend by. | |
// Multiply the number of words into a number of bytes - that's what the | |
// mem_sbrk function needs. | |
size = words * WSIZE; | |
// Generate the free block of spzce. | |
if ((long)(bp = mem_sbrk(size)) == -1) | |
{ | |
// There was a problem. | |
return NULL; | |
} | |
// Initialize free block header and footer and the epilogue header. | |
pack_tag(HEADER_FOR_BLOCK(bp), size, 0); // Free block header. | |
pack_tag(FOOTER_FOR_BLOCK(bp), size, 0); | |
pack_tag(HEADER_FOR_BLOCK(NEXT_BLOCK(bp)), 0, 1); // New epilogue header. | |
// Integrate the new block. | |
bp = coalesce_and_integrate(bp); | |
// And give back the new pointer. | |
return bp; | |
} | |
static size_t | |
get_adjusted_size(size_t size) | |
{ | |
size_t adjusted_size; | |
// Adjust block size to include overhead and alignment requirements. | |
if (size <= ((MIN_BLOCK_SIZE - 2) * WSIZE)) | |
{ | |
// Any block must be at least four words long. This is because a free | |
// block must be able to store pointers to both the next and previous | |
// block in the same free list, and there is a footer. | |
adjusted_size = MIN_BLOCK_SIZE * WSIZE; | |
} | |
else | |
{ | |
// Interior area. | |
adjusted_size = size; | |
if (VERBOSE) | |
{ | |
printf(" interior: %zu bytes (%zu words)\n", adjusted_size, adjusted_size / WSIZE); | |
} | |
// Every block has a header... | |
adjusted_size += WSIZE; | |
if (VERBOSE) | |
{ | |
printf(" + header: %zu bytes (%zu words)\n", adjusted_size, adjusted_size / WSIZE); | |
} | |
// And every block has a footer. | |
adjusted_size += WSIZE; | |
if (VERBOSE) | |
{ | |
printf(" + footer: %zu bytes (%zu words)\n", adjusted_size, adjusted_size / WSIZE); | |
} | |
} | |
// Add a small pad to ensure integer division completes as expected, and | |
// then multiply back to get the proper number of bytes. The division | |
// is done with double-words to ensure good block boundaries. | |
adjusted_size += (ALIGNMENT - 1); | |
adjusted_size /= ALIGNMENT; | |
adjusted_size *= ALIGNMENT; | |
if (VERBOSE) | |
{ | |
printf(" adjusted: %zu bytes (%zu words)\n", adjusted_size, adjusted_size / WSIZE); | |
} | |
return adjusted_size; | |
} | |
/**** | |
* coalesce_and_integrate | |
* | |
* Given a block pointer, this method first coalesces that block to ensure large | |
* contiguous free blocks (less fragmentation), and then integrates that new | |
* block into the appropriate free list. | |
*/ | |
static void* | |
coalesce_and_integrate(void* bp) | |
{ | |
bp = coalesce(bp); | |
integrate(bp); | |
return bp; | |
} | |
/**** | |
* coalesce | |
* | |
* Ensures contiguous free blocks where applicable. Takes a pointer to a free | |
* block of memory and attempts to combine it with its surrounding blocks if it | |
* can. The return value is a pointer to the free block containing the original | |
* just-freed block. This method will also remove other blocks from their free | |
* lists as needed during assimilation. BUT IT WILL NOT ADD ANYTHING TO A FREE | |
* LIST. | |
* | |
* Cases: | |
* - Neither the previous nor next block are free. | |
* - Previous block is not free but the next block is. | |
* - Previous block is free but the next block is not. | |
* - Both surrounding blocks are free. | |
* | |
* Assumptions: | |
* - Coalescing happens immediately after alotment. | |
* - No free block can be adjacent to a free block; they must be coalesced. | |
* - Somebody else is taking care of integration into free lists. | |
*/ | |
static void* | |
coalesce(void* bp) | |
{ | |
size_t alloc = ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
size_t prev_alloc = ALLOC_FROM_TAG(FOOTER_FOR_PREV_BLOCK(bp)); | |
size_t next_alloc = ALLOC_FROM_TAG(HEADER_FOR_BLOCK(NEXT_BLOCK(bp))); | |
size_t size = SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
// Be certain that the block is unallocated. If it's not, just abort. | |
if (alloc) | |
{ | |
return bp; | |
} | |
if (VERBOSE) | |
{ | |
printf("coalesce: "); | |
print_address(bp); | |
printf("\n"); | |
printf(" prev_alloc: %zu\n", prev_alloc); | |
printf(" next_alloc: %zu\n", next_alloc); | |
} | |
if (prev_alloc && next_alloc) | |
{ | |
// Both surrounding blocks are allocated. Nothing to do. | |
return bp; | |
} | |
else if (prev_alloc && !next_alloc) | |
{ | |
// The previous block is allocated, but the next block is free. | |
// Remove the next block from its free list. | |
remove_block_from_free_list(NEXT_BLOCK(bp)); | |
// Combine this block with the next block. | |
size += SIZE_FROM_TAG(HEADER_FOR_BLOCK(NEXT_BLOCK(bp))); | |
pack_tag(HEADER_FOR_BLOCK(bp), size, 0); | |
pack_tag(FOOTER_FOR_BLOCK(bp), size, 0); | |
} | |
else if (!prev_alloc && next_alloc) | |
{ | |
// The previous block is free, but the next block is allocated. | |
// Remove the previous block from its free list. | |
remove_block_from_free_list(PREV_BLOCK(bp)); | |
// Combine the previous block with this block. | |
size += SIZE_FROM_TAG(HEADER_FOR_BLOCK(PREV_BLOCK(bp))); | |
pack_tag(FOOTER_FOR_BLOCK(bp), size, 0); | |
pack_tag(HEADER_FOR_BLOCK(PREV_BLOCK(bp)), size, 0); | |
bp = PREV_BLOCK(bp); | |
} | |
else | |
{ | |
// Both the previous block and the next block are free. | |
// Remove both of them from their free lists. | |
remove_block_from_free_list(NEXT_BLOCK(bp)); | |
remove_block_from_free_list(PREV_BLOCK(bp)); | |
// Combine all three blocks together into one large contiguous free | |
// block. | |
size += SIZE_FROM_TAG(HEADER_FOR_BLOCK(PREV_BLOCK(bp))) + SIZE_FROM_TAG(FOOTER_FOR_BLOCK(NEXT_BLOCK(bp))); | |
pack_tag(HEADER_FOR_BLOCK(PREV_BLOCK(bp)), size, 0); | |
pack_tag(FOOTER_FOR_BLOCK(NEXT_BLOCK(bp)), size, 0); | |
bp = PREV_BLOCK(bp); | |
} | |
if (VERBOSE) | |
{ | |
checkheap(VERBOSE); | |
} | |
// Since we may have moved the block pointer, be sure to return the | |
// (potentially) new one! | |
return bp; | |
} | |
/**** | |
* integrate | |
* | |
* Given a free block, ensures that the free block is properly inserted into the | |
* appropriate free list. | |
* | |
* When integrating a block into an existing list, we insert the new block into | |
* the head of the list and have it point back to the previous first element. | |
* | |
* The diagram below shows the change that happens during integration. We see | |
* the size class head on the far left, an already-integrated free block in the | |
* middle, and a new (unintegrated) free block on the right. Connections on the | |
* top side of each diagram are "next" pointers, and connections on the bottom | |
* are "previous" pointers. | |
* | |
* Note that the first node in the list will always have its "previous" pointer | |
* set back to the head of the list; this is to properly update the pointers if | |
* a free node is allocated. No node can be a part of a list and not have its | |
* "previous" pointer set. | |
* | |
* However, the last node in a list will not have a "next" pointer. | |
* | |
* Before integration: | |
* | |
* +-next->>>-----+ 0 | |
* | | | | |
* +-|-+--- ---+V-|+---+---+---+---+--- ---+---+---+---+---+--- | |
* | | ... | | ############# | ... | ############# | ... | |
* +-*-+--- ---+-|-+---+---+---+---+--- ---+---+---+---+---+--- | |
* | | | |
* +------<<<-prev-+ | |
* | |
* After integration: | |
* | |
* +------>>>------------next------------->>>-------+ | |
* | 0 +----------next-------<<<-----+ | | |
* | | | | | | |
* +-|-+--- ---+|-V+---+---+---+---+--- ---+|-V+---+---+---+--- | |
* | | ... | | ############# | ... | | ######### | ... | |
* +-*-+--- ---+-|-+---+---+---+---+--- ---+*-|+---+---+---+--- | |
* | | | | | |
* | +---------prev--------->>>-----+ | | |
* +------<<<-----------prev--------------<<<-------+ | |
*/ | |
static void | |
integrate(void* bp) | |
{ | |
void* old_node; | |
if (VERBOSE) | |
{ | |
printf("integrate: "); | |
print_address(bp); | |
printf("\n"); | |
} | |
size_t alloc = ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
size_t size = SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
// Verify that we're dealing with a free block here. | |
if (alloc) | |
{ | |
// Apparently we're not. | |
return; | |
} | |
// Get the pointer to the list for the required class. | |
void* list_head = size_class_pointer_for_size(size); | |
if (VERBOSE) | |
{ | |
printf(" list_head: "); | |
print_address(list_head); | |
printf("\n"); | |
} | |
old_node = (void*)GET(list_head); | |
if (VERBOSE) | |
{ | |
printf(" old node: "); | |
print_address(old_node); | |
printf("\n"); | |
} | |
// Set the pointers in the new node. | |
SET_NEXT_LIST_ELEMENT(bp, old_node); | |
SET_PREV_LIST_ELEMENT(bp, list_head); | |
// Did the old node exist? | |
if (old_node) | |
{ | |
// Set the 'previous' pointer in the old node. | |
SET_PREV_LIST_ELEMENT(old_node, bp); | |
} | |
// Update the list head. | |
SET_NEXT_LIST_ELEMENT(list_head, bp); | |
if (VERBOSE) | |
{ | |
printf(" new node: "); | |
print_address(bp); | |
printf("\n"); | |
printf(" *list_head: "); | |
print_address((void*)GET(list_head)); | |
printf("\n"); | |
checkheap(VERBOSE); | |
} | |
} | |
/**** | |
* place | |
* | |
* Place a block of 'size' bytes at the start of the block pointed to by 'bp' | |
* and split it if the remainder would be at least the minimum block size. The | |
* block may be either allocated or unallocated when it comes in, but it will be | |
* allocated at the completion of this method. | |
*/ | |
static void | |
place( | |
void* bp, | |
size_t size ) | |
{ | |
if (VERBOSE) | |
{ | |
printf("place: "); | |
print_address(bp); | |
printf("\n"); | |
printf(" %zu bytes (%zu words)\n", size, size / WSIZE); | |
} | |
// Get some info. | |
size_t block_size = SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
size_t allocated = ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
// Check whether this block is free or allocated. | |
if (!allocated) | |
{ | |
// It's a free block! Remove it from its free list before we do | |
// anything else to it. | |
remove_block_from_free_list(bp); | |
} | |
// Check if the current block can be split. | |
if ((block_size - size) >= MIN_BLOCK_SIZE * WSIZE) | |
{ | |
if (VERBOSE) | |
{ | |
printf(" block can be split\n"); | |
} | |
// Mark the header and footer to denote allocation and new size. | |
pack_tag(HEADER_FOR_BLOCK(bp), size, 1); | |
pack_tag(FOOTER_FOR_BLOCK(bp), size, 1); | |
// Now mark the next block as being unallocated - it was large enough to | |
// make it on its own. | |
bp = NEXT_BLOCK(bp); | |
pack_tag(HEADER_FOR_BLOCK(bp), block_size - size, 0); | |
pack_tag(FOOTER_FOR_BLOCK(bp), block_size - size, 0); | |
// Now integrate that new free block. | |
bp = coalesce_and_integrate(bp); | |
} | |
else | |
{ | |
if (VERBOSE) | |
{ | |
printf(" utilizing whole block\n"); | |
} | |
// There was not enough free space to split the block, so just allocate | |
// the whole thing. | |
pack_tag(HEADER_FOR_BLOCK(bp), block_size, 1); | |
pack_tag(FOOTER_FOR_BLOCK(bp), block_size, 1); | |
} | |
} | |
/**** | |
* remove_block_from_free_list | |
* | |
* As the name suggests, this method will remove a block from a free list (if it | |
* is a free block). | |
*/ | |
static void | |
remove_block_from_free_list(void* bp) | |
{ | |
// Ensure that the block is actually free. | |
if (ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp))) | |
{ | |
return; | |
} | |
if (VERBOSE) | |
{ | |
printf("remove_block_from_free_list: "); | |
print_address(bp); | |
printf("\n"); | |
} | |
void* prev_node = (void*)PREV_LIST_ELEMENT(bp); | |
void* next_node = (void*)NEXT_LIST_ELEMENT(bp); | |
if (VERBOSE) | |
{ | |
printf(" PREV_LIST_ELEMENT: "); | |
print_address(prev_node); | |
printf("\n"); | |
printf(" NEXT_LIST_ELEMENT: "); | |
print_address(next_node); | |
printf("\n"); | |
} | |
// Update the previous node (if it exists). | |
if (prev_node != 0) | |
{ | |
PUT(prev_node, (unsigned int)next_node); | |
} | |
// Update the next node (if it exists). | |
if (next_node != 0) | |
{ | |
PUT(next_node + WSIZE, (unsigned int)prev_node); | |
} | |
} | |
/**** | |
* find_fit | |
* | |
* Searches the appropriate size class free list to find a suitable block for | |
* the given size (which is assumed to be in bytes). | |
*/ | |
static void* | |
find_fit(size_t size) | |
{ | |
int size_class; | |
void* best_fit; | |
// Find the beginning of the designated list for the given size. | |
size_class = size_class_for_size(size); | |
best_fit = first_element_from_size_class(size_class); | |
if (VERBOSE) | |
{ | |
printf("find_fit: %zu bytes (%zu words)\n", size, size / WSIZE); | |
printf(" best fit size class: %d\n", size_class); | |
} | |
if (best_fit != 0) | |
{ | |
if (VERBOSE) | |
{ | |
printf(" best fit first node: "); | |
print_address(best_fit); | |
printf("\n"); | |
} | |
// The appropriate list is initialized, but we should search it for an | |
// element that actually fits. (Since some of the lists are range-based, | |
// it is possible to find elements too small for our new block.) | |
while (SIZE_FROM_TAG(HEADER_FOR_BLOCK(best_fit)) < size) | |
{ | |
if (VERBOSE) | |
{ | |
printf(" size too small: %u\n", SIZE_FROM_TAG(HEADER_FOR_BLOCK(best_fit))); | |
} | |
// Move to the next element of the list. | |
best_fit = (void*)NEXT_LIST_ELEMENT(best_fit); | |
if (VERBOSE) | |
{ | |
printf(" best fit next node: "); | |
print_address(best_fit); | |
printf("\n"); | |
} | |
if (best_fit == 0) | |
{ | |
// We got to the end of the best-fit free list. Lamentations. | |
if (VERBOSE) | |
{ | |
printf(" nothing valid in best fit list\n"); | |
} | |
// Increment the size class to move on with our lives. | |
++size_class; | |
break; | |
} | |
} | |
} | |
/******** | |
* I'm keeping this here for humility. | |
* | |
* So it turns out that these clever 'while' loops -- the kind where a | |
* value is assigned on each iteration -- can be really tricky if you later | |
* edit code that occurs before the loop. | |
* | |
* Prime example: This loop caused an issue where, after finding a best fit | |
* node element that is *not* the first element, the first element would be | |
* reassigned to the 'best_fit' variable here. | |
* | |
* Which means that problems happened. Weird, obnoxious, difficult-to-debug | |
* problems. | |
* | |
* Sigh. | |
* | |
* // Ensure that that list is initialized. If it's not, increment up the size | |
* // classes as needed. | |
* while ((best_fit = first_element_from_size_class(size_class)) == 0 && size_class < NUM_SIZE_CLASSES) | |
* { | |
* ++size_class; | |
* if (VERBOSE) | |
* { | |
* printf(" list uninitialized; incrementing to %d\n", size_class); | |
* } | |
* } | |
*/ | |
// At this point, we may have an empty pointer. If this is the case, then | |
// the best-fit size class had no nodes of the appropriate size. Iterate | |
// over all of the larger size class lists until we find a valid pointer. | |
while (best_fit == 0 && size_class < NUM_SIZE_CLASSES) | |
{ | |
// No luck. Jump to the next list! | |
best_fit = first_element_from_size_class(++size_class); | |
if (VERBOSE) | |
{ | |
printf(" list uninitialized; incrementing to %d\n", size_class); | |
} | |
} | |
// We might have just gotten to the last size class and it isn't | |
// initialized. | |
if (best_fit == 0) | |
{ | |
// Curses. | |
if (VERBOSE) | |
{ | |
printf(" no initialized size class lists\n"); | |
} | |
return NULL; | |
} | |
// We have a valid list. Hooray! So give the pointer to the first element | |
// of the list. | |
return best_fit; | |
} | |
/**** | |
* pack_tag | |
* | |
* Creates a tag and places it at a given pointer location. It is assumed that | |
* the pointer does *not* point to a block, but to a header or footer position. | |
* The low-order bit values are assumed to be given as booleans. | |
* | |
* 0 29 30 31 bit positions | |
* +-----------+---+---+---+ | |
* | sss...sss | u | p | a | | |
* +-----|-----+-|-+-|-+-|-+ | |
* | | | | | |
* | | | + current block allocation status bit | |
* | | + previous block allocation status bit | |
* | + unused bit | |
* + current block size | |
*/ | |
static void | |
pack_tag( | |
void* ptr, | |
size_t size, | |
int alloc ) | |
{ | |
// Ensure low-order bits are either 0 or 1 - nothing else. | |
alloc = !(!alloc); | |
// Pack the tag! | |
PUT(ptr, PACK(size, alloc)); | |
} | |
/**** | |
* size_class_for_size | |
* | |
* Converts a size (in bytes) into a size class based on the maximum number of | |
* size classes allowed by NUM_SIZE_CLASSES. | |
*/ | |
static int | |
size_class_for_size(size_t size) | |
{ | |
int size_class = 1; | |
size_t words = size / WSIZE; | |
if (VERBOSE) | |
{ | |
printf("size class for size: %zu bytes (%zu words)\n", size, words); | |
} | |
// Pick a size class based on the size given. | |
if (words <= 3) | |
{ | |
// The way things are, we can't have fewer than two payload words in a | |
// block. | |
size_class = 1; | |
} | |
else if (words <= 7) | |
{ | |
// If it's less than 8 words, then it has its own special size class. | |
size_class = words - 2; | |
} | |
else if (words < (1 << (NUM_SIZE_CLASSES - 3))) | |
{ | |
// From 8 words on, we segregate by powers of two. The largest size | |
// class is 2^(number of size classes - 3). Deducing the purpose of the | |
// 'minus three' is an exercise left to the reader. | |
while (words >>= 1) | |
{ | |
// Roll the size to the right, incrementing the size class number. | |
++size_class; | |
} | |
// To account for some arithmetic, we have to increment by five. | |
size_class += 2; | |
} | |
else | |
{ | |
// This is the largest size class available. | |
size_class = NUM_SIZE_CLASSES; | |
} | |
if (VERBOSE) | |
{ | |
printf(" size_class: %d\n", size_class); | |
} | |
// Found it! | |
return size_class; | |
} | |
/**** | |
* first_element_from_size_class | |
* | |
* Given a size class, returns the location of the first element of that list, | |
* or NULL if none exists. | |
*/ | |
static void* | |
first_element_from_size_class(int size_class) | |
{ | |
// Just cast the returned value from the size class header into a pointer. | |
void* first_element = (void*)GET(SIZE_CLASS_HEAD(size_class)); | |
if (first_element == 0) | |
{ | |
return NULL; | |
} | |
return first_element; | |
} | |
/**** | |
* size_class_pointer_for_size | |
* | |
* Determines the correct list head for a size class based on a given size in | |
* bytes. | |
*/ | |
static void* | |
size_class_pointer_for_size(size_t size) | |
{ | |
// Calculate the size_class value (the number of the size class). | |
int size_class = size_class_for_size(size); | |
// Now fetch and return the pointer to the first element for the size class. | |
return SIZE_CLASS_HEAD(size_class); | |
} | |
/******************************************************************************* | |
* Printout Methods | |
* | |
* These methods just print out information during runtime. They are | |
* automatically invoked if the preprocessor directives VERBOSE or VERIFY are | |
* modified. | |
* | |
* There is little documentation because these methods changed frequently and | |
* extensively throughout development. | |
*/ | |
int | |
mm_check(void) | |
{ | |
int errors = !checkheap(VERBOSE); | |
if (VERBOSE) | |
{ | |
printf("\n"); | |
} | |
return errors; | |
} | |
static void | |
print_address(void* ptr) | |
{ | |
printf("%p", ptr); | |
if (SHOW_SHORT_WORDS) | |
{ | |
printf(" (%d)", short_word_address(ptr)); | |
} | |
} | |
static int | |
short_address(void* ptr) | |
{ | |
return ptr - (void*)heap_listp; | |
} | |
static int | |
short_word_address(void* ptr) | |
{ | |
int adjusted = short_address(ptr) - (2 * WSIZE); | |
return adjusted / WSIZE; | |
} | |
static void | |
printblock(void* bp) | |
{ | |
size_t header_size; | |
size_t header_alloc; | |
size_t footer_size; | |
size_t footer_alloc; | |
header_size = SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)) / WSIZE; | |
header_alloc = ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp)); | |
footer_size = SIZE_FROM_TAG(FOOTER_FOR_BLOCK(bp)) / WSIZE; | |
footer_alloc = ALLOC_FROM_TAG(FOOTER_FOR_BLOCK(bp)); | |
print_address(bp); | |
printf("\n"); | |
printf(" [%c|%zu", (header_alloc ? 'a' : 'f'), header_size); | |
if (header_size != 0) | |
{ | |
printf("||%c|%zu", (footer_alloc ? 'a' : 'f'), footer_size); | |
} | |
printf("]\n"); | |
printf(" header: "); | |
print_address(HEADER_FOR_BLOCK(bp)); | |
printf("\n"); | |
if (header_size != 0) | |
{ | |
printf(" footer: "); | |
print_address(FOOTER_FOR_BLOCK(bp)); | |
printf("\n"); | |
} | |
if (NEXT_BLOCK(bp) != bp) | |
{ | |
printf(" next header: "); | |
print_address(HEADER_FOR_BLOCK(NEXT_BLOCK(bp))); | |
printf("\n"); | |
} | |
else | |
{ | |
printf(" next header: N/A\n"); | |
} | |
// Print other nodes if this is a free node in a list. | |
if (!header_alloc) | |
{ | |
printf(" next node: "); | |
print_address((void*)NEXT_LIST_ELEMENT(bp)); | |
printf("\n"); | |
printf(" prev node: "); | |
print_address((void*)PREV_LIST_ELEMENT(bp)); | |
printf("\n"); | |
} | |
} | |
static int | |
checkblock(void* bp) | |
{ | |
int error = 0; | |
// Check alignment. | |
if ((size_t)bp % 8) | |
{ | |
printf("Error: %p is not double-word aligned!\n", bp); | |
error = 1; | |
} | |
return error; | |
} | |
static int | |
checkheap(int verbose) | |
{ | |
int i; | |
char* bp = heap_listp; | |
int error = 0; | |
if (verbose) | |
{ | |
printf("Heap (%p): %lu + 16 bytes (%lu + 4 words)\n", heap_listp, mem_heapsize() - 16, (mem_heapsize() / WSIZE) - 4); | |
printf(" mem_heapsize: %zu bytes (%zu words)\n", mem_heapsize(), mem_heapsize() / WSIZE); | |
printf(" EPILOGUE_HEADER: "); | |
print_address(EPILOGUE_HEADER); | |
printf("\n"); | |
} | |
// Printing whether the size class pointers are initialized. | |
printf("Size class pointer initialization:\n"); | |
printf(" "); | |
for (i = 1; i <= NUM_SIZE_CLASSES; ++i) | |
{ | |
printf("%4d", i); | |
} | |
printf("\n"); | |
printf(" |"); | |
for (i = 1; i <= NUM_SIZE_CLASSES; ++i) | |
{ | |
printf(" %c |", (first_element_from_size_class(i) == 0) ? '_' : '#'); | |
} | |
printf("\n"); | |
for (i = 1; i <= NUM_SIZE_CLASSES; ++i) | |
{ | |
printf(" %4d. ", i); | |
print_address(first_element_from_size_class(i)); | |
printf(" || "); | |
print_address((void*)SIZE_CLASS_HEAD(i)); | |
printf("\n"); | |
} | |
if ((SIZE_FROM_TAG(HEADER_FOR_BLOCK(heap_listp)) != (2 * WSIZE)) || !(ALLOC_FROM_TAG(HEADER_FOR_BLOCK(heap_listp)))) | |
{ | |
printf("Bad prologue header.\n"); | |
error = 1; | |
} | |
error |= checkblock(heap_listp); | |
for (bp = heap_listp; SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)) > 0; bp = NEXT_BLOCK(bp)) | |
{ | |
if (verbose) | |
{ | |
printblock(bp); | |
} | |
error |= checkblock(bp); | |
} | |
if (verbose) | |
{ | |
printblock(bp); | |
} | |
if ((SIZE_FROM_TAG(HEADER_FOR_BLOCK(bp)) != 0) || !(ALLOC_FROM_TAG(HEADER_FOR_BLOCK(bp)))) | |
{ | |
printf("Bad epilogue header.\n"); | |
error = 1; | |
} | |
return error; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment