enfiskutensykkel/bandwidth.cu

## bandwidth.cu
#include <cuda.h>
#include <stdexcept>
#include <string>
#include <vector>
#include <memory>
#include <cstdint>
#include <cstdio>
#include <unistd.h>

using error = std::runtime_error;
using std::string;
using gpulist = std::vector<int>;
using gpuptr = std::shared_ptr<void>;


struct event
{
    cudaEvent_t evt;

    event()
    {
        auto err = cudaEventCreate(&evt);
        if (err != cudaSuccess)
        {
            throw error("Could not create event: " + string(cudaGetErrorString(err)));
        }

        err = cudaEventRecord(evt);
        if (err != cudaSuccess)
        {
            throw error("Could not record event: " + string(cudaGetErrorString(err)));
        }
    }

    ~event()
    {
        cudaEventDestroy(evt);
    }

    double operator-(const event& other) const
    {
        float ms = 0;
        auto err = cudaEventElapsedTime(&ms, other.evt, evt);
        if (err != cudaSuccess)
        {
            throw error("Could not get elapsed time: " + string(cudaGetErrorString(err)));
        }

        return ((double) ms) * 1000.0;
    }
};


__host__ static void setDevice(int device)
{
    auto err = cudaSetDevice(device);
    if (err != cudaSuccess)
    {
        throw error("Could not set device: " + string(cudaGetErrorString(err)));
    }
}


__host__ static gpuptr createBuffer(int device, size_t size, bool gpu)
{
    cudaError_t err;
    void* buffer = nullptr;

    if (gpu)
    {
        setDevice(device);

        err = cudaMalloc(&buffer, size);
        if (err != cudaSuccess)
        {
            throw error("Could not allocate device memory: " + string(cudaGetErrorString(err)));
        }

        return gpuptr(buffer, cudaFree);
    }
    else
    {
        err = cudaHostAlloc(&buffer, size, cudaHostAllocMapped);
        if (err != cudaSuccess)
        {
            throw error("Could not allocate mapped host memory: " + string(cudaGetErrorString(err)));
        }

        return gpuptr(buffer, cudaFreeHost);
    }
}

__host__ static void runHost(int dev, gpuptr gpubuf, gpuptr hostbuf, size_t size, size_t count, size_t repeat, cudaMemcpyKind kind)
{
    const size_t start = 1024;
    const size_t end = size;

    for (size_t size = start; size <= end; size <<= 1)
    {
        for (size_t i = 0; i < repeat; ++i)
        {
            setDevice(dev);

            auto src = hostbuf;
            auto dst = gpubuf;
            if (kind == cudaMemcpyHostToDevice)
            {
                src = gpubuf;
                dst = hostbuf;
            }

            event before;

            for (size_t j = 0; j < count; ++j)
            {
                auto err = cudaMemcpy(src.get(), dst.get(), size, kind);
                if (err != cudaSuccess)
                {
                    throw error("Could not copy memory: " + string(cudaGetErrorString(err)));
                }
            }

            event after;

            auto err = cudaDeviceSynchronize();
            if (err != cudaSuccess)
            {
                throw error(cudaGetErrorString(err));
            }

            auto time = (after - before) / count;
            auto bw = ((double) size) / time;

            if (kind == cudaMemcpyHostToDevice)
            {
                fprintf(stdout, "host %4d HtoD %10zu %12.5f %12.3f\n", dev, size, time, bw);
            }
            else
            {
                fprintf(stdout, "%4d host DtoH %10zu %12.5f %12.3f\n", dev, size, time, bw);
            }
        }
    }
}


__global__ static void dostuff(int* ptr)
{
    float j = threadIdx.x;

    for (int i = 1; i < 10000; ++i)
    {
        j = (j + 1) / j;
    }

    if (threadIdx.x == j)
    {
        ptr[0] = j;
    }
}


__host__ static void runP2P(bool p2p, int srcdev, gpuptr srcbuf, int dstdev, gpuptr dstbuf, size_t size, size_t count, size_t repeat)
{
    cudaError_t err;
    const size_t start = 1024;
    const size_t end = size;

    for (size_t size = start; size <= end; size <<= 1)
    {
        for (size_t i = 0; i < repeat; ++i)
        {
            setDevice(srcdev);

            dostuff<<<1, 1>>>((int*) srcbuf.get());

            event before;

            for (size_t j = 0; j < count; ++j)
            {
                if (p2p)
                {
                    err = cudaMemcpyPeerAsync(srcbuf.get(), srcdev, dstbuf.get(), dstdev, size);
                }
                else
                {
                    err = cudaMemcpyAsync(srcbuf.get(), dstbuf.get(), size, cudaMemcpyDeviceToDevice);
                }

                if (err != cudaSuccess)
                {
                    throw error("Could not copy memory: " + string(cudaGetErrorString(err)));
                }
            }

            event after;

            err = cudaDeviceSynchronize();
            if (err != cudaSuccess)
            {
                throw error(cudaGetErrorString(err));
            }

            auto time = (after - before) / count;
            auto bw = ((double) size) / time;
            fprintf(stdout, "%4d %4d DtoD %10zu %12.5f %12.3f\n", srcdev, dstdev, size, time, bw);
        }
    }
}


__host__ static void bandwidthTest(bool p2p, bool dtoh, bool htod, bool dtod, int ping, int pong, size_t count, size_t repeat)
{
    const size_t size = 128ULL << 20;

    gpuptr src = createBuffer(ping, size, true);
    gpuptr dst = createBuffer(pong, size, true);
    gpuptr host = createBuffer(-1, size, false);

    fprintf(stdout, "%4s %4s %4s %10s %12s %12s\n", "src", "dst", "mode", "size", "usecs", "bandwidth");
    if (dtoh)
    {
        runHost(ping, src, host, size, count, repeat, cudaMemcpyDeviceToHost);
        runHost(pong, dst, host, size, count, repeat, cudaMemcpyDeviceToHost);
    }
    if (htod)
    {
        runHost(ping, src, host, size, count, repeat, cudaMemcpyHostToDevice);
        runHost(pong, dst, host, size, count, repeat, cudaMemcpyHostToDevice);
    }
    if (dtod)
    {
        runP2P(p2p, ping, src, pong, dst, size, count, repeat);
        runP2P(p2p, pong, dst, ping, src, size, count, repeat);
    }
}


static void enableP2P(int src, int dst)
{
    cudaError_t err;

    err = cudaSetDevice(src);
    if (err != cudaSuccess)
    {
        throw error(string("Could not set GPU: ") + cudaGetErrorString(err));
    }

    err = cudaDeviceEnablePeerAccess(dst, 0);
    if (err != cudaSuccess)
    {
        throw error(string("Failed to enable P2P for GPU: ") + cudaGetErrorString(err));
    }
}


static string formatBdf(int domain, int bus, int device)
{
    char buffer[64];
    sprintf(buffer, "%04x:%02x:%02x.0", domain, bus, device);
    return string(buffer);
}


static string getGpuName(int device)
{
    auto err = cudaSetDevice(device);
    if (err != cudaSuccess)
    {
        throw error(string("Could not set GPU: ") + cudaGetErrorString(err));
    }

    cudaDeviceProp prop;
    err = cudaGetDeviceProperties(&prop, device);
    if (err != cudaSuccess)
    {
        throw error(string("Could not get GPU properties: ") + cudaGetErrorString(err));
    }

    return string(prop.name) + " " + formatBdf(prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
}


static bool checkP2P(int i, int j, bool use_p2p)
{
    if (i == j)
    {
        return false;
    }

    if (!use_p2p)
    {
        return true;
    }

    int p2p = 0;
    auto err = cudaDeviceCanAccessPeer(&p2p, i, j);
    if (err != cudaSuccess)
    {
        throw error(string("Failed to check for P2P access: ") + cudaGetErrorString(err));
    }

    if (p2p != 1)
    {
        return false;
    }

    err = cudaDeviceCanAccessPeer(&p2p, j, i);
    if (err != cudaSuccess)
    {
        throw error(string("Failed to check for P2P access: ") + cudaGetErrorString(err));
    }

    if (p2p != 1)
    {
        return false;
    }

    return true;
}


static void findGpus(gpulist& gpus, bool p2p, int& ping, int& pong)
{
    cudaError_t err;
    int numGpus = 0;

    err = cudaGetDeviceCount(&numGpus);
    if (err != cudaSuccess)
    {
        throw error(string("Failed to get GPU count: ") + cudaGetErrorString(err));
    }

    if (gpus.empty())
    {
        for (int i = 0; i < numGpus; ++i)
        {
            gpus.push_back(i);
        }
    }

    for (int i: gpus)
    {
        if (gpus.size() > 1)
        {
            for (int j: gpus)
            {
                if (checkP2P(i, j, p2p))
                {
                    ping = i;
                    pong = j;
                    return;
                }
            }
        }
        else
        {
            for (int j = 0; j < numGpus; ++j)
            {
                if (checkP2P(i, j, p2p))
                {
                    ping = i;
                    pong = j;
                    return;
                }
            }
        }
    }

    throw error("Could not find suitable GPUs, try -m option for host memory");
}


static string usageString(const char* name)
{
    return string("Usage: ") + name + string(" [-c <count>] [-r <repeat>] [-m] [-g <gpu>]...");
}


static void parseArguments(int argc, char** argv, uint32_t& repeat, uint32_t& count, bool& p2p, bool& dtod, bool& all, gpulist& gpus)
{
    char* str;
    int opt;
    int gpu = -1;
    int numGpus = 0;

    auto err = cudaGetDeviceCount(&numGpus);
    if (err != cudaSuccess)
    {
        throw error(string("Failed to get GPU count: ") + cudaGetErrorString(err));
    }

    while ((opt = getopt(argc, argv, ":c:r:n:hg:mda")) != -1)
    {
        switch (opt)
        {
            case 'h':
                throw error(usageString(argv[0]));

            case ':':
                throw error(string("Missing value for option: ") + argv[optind-1]);

            case '?':
                throw error(string("Unknown option: ") + argv[optind-1]);

            case 'c':
                str = nullptr;
                count = strtoul(optarg, &str, 0);
                if (str == NULL || *str != '\0' || count == 0)
                {
                    throw error("You're doing it wrong!");
                }
                break;

            case 'r':
            case 'n':
                str = nullptr;
                repeat = strtoul(optarg, &str, 0);
                if (str == NULL || *str != '\0' || repeat == 0)
                {
                    throw error("You're doing it wrong!");
                }
                break;

            case 'm':
                p2p = false;
                break;

            case 'a':
                all = true;
                break;

            case 'd':
                dtod = true;
                break;

            case 'g':
                str = nullptr;
                gpu = strtol(optarg, &str, 10);
                if (str == NULL || *str != '\0' || gpu < 0 || gpu >= numGpus)
                {
                    throw error("Invalid GPU");
                }
                gpus.push_back(gpu);
                break;
        }
    }
}


int main(int argc, char** argv)
{
    uint32_t repeat = 10;
    uint32_t count = 1;
    bool p2p = true;
    int ping = -1;
    int pong = -1;
    bool all = false;
    bool dtod = false;

    try
    {
        gpulist gpus;
        parseArguments(argc, argv, repeat, count, p2p, dtod, all, gpus);

        findGpus(gpus, p2p, ping, pong);

        fprintf(stderr, "PING %d %s\n", ping, getGpuName(ping).c_str());
        if (p2p)
        {
            enableP2P(ping, pong);
        }

        fprintf(stderr, "PONG %d %s\n", pong, getGpuName(pong).c_str());
        if (p2p)
        {
            enableP2P(pong, ping);
        }

        fprintf(stdout, "count=%u\nrepeat=%u\n", count, repeat);
        fprintf(stdout, "uva=%s\n", p2p ? "false" : "true");
        fprintf(stdout, "p2p=%s\n", p2p ? "true" : "false");
        fprintf(stdout, "ping=%d %s\n",
                ping, getGpuName(ping).c_str());
        fprintf(stdout, "pong=%d %s\n",
                pong, getGpuName(pong).c_str());

        cudaDeviceProp props;
        cudaGetDeviceProperties(&props, ping);

        fprintf(stdout, "memory_rate=%d kHz\n", props.memoryClockRate);
        fprintf(stdout, "clock_rate=%d kHz\n", props.clockRate);

        fprintf(stdout, "\n");
        bandwidthTest(p2p, all, all, dtod, ping, pong, count, repeat);
        fprintf(stdout, "\n");
    }
    catch (const error& err)
    {
        fprintf(stderr, "%s\n", err.what());
        return 1;
    }

    return 0;
}
	#include <cuda.h>
	#include <stdexcept>
	#include <string>
	#include <vector>
	#include <memory>
	#include <cstdint>
	#include <cstdio>
	#include <unistd.h>

	using error = std::runtime_error;
	using std::string;
	using gpulist = std::vector<int>;
	using gpuptr = std::shared_ptr<void>;


	struct event
	{
	cudaEvent_t evt;

	event()
	{
	auto err = cudaEventCreate(&evt);
	if (err != cudaSuccess)
	{
	throw error("Could not create event: " + string(cudaGetErrorString(err)));
	}

	err = cudaEventRecord(evt);
	if (err != cudaSuccess)
	{
	throw error("Could not record event: " + string(cudaGetErrorString(err)));
	}
	}

	~event()
	{
	cudaEventDestroy(evt);
	}

	double operator-(const event& other) const
	{
	float ms = 0;
	auto err = cudaEventElapsedTime(&ms, other.evt, evt);
	if (err != cudaSuccess)
	{
	throw error("Could not get elapsed time: " + string(cudaGetErrorString(err)));
	}

	return ((double) ms) * 1000.0;
	}
	};


	__host__ static void setDevice(int device)
	{
	auto err = cudaSetDevice(device);
	if (err != cudaSuccess)
	{
	throw error("Could not set device: " + string(cudaGetErrorString(err)));
	}
	}


	__host__ static gpuptr createBuffer(int device, size_t size, bool gpu)
	{
	cudaError_t err;
	void* buffer = nullptr;

	if (gpu)
	{
	setDevice(device);

	err = cudaMalloc(&buffer, size);
	if (err != cudaSuccess)
	{
	throw error("Could not allocate device memory: " + string(cudaGetErrorString(err)));
	}

	return gpuptr(buffer, cudaFree);
	}
	else
	{
	err = cudaHostAlloc(&buffer, size, cudaHostAllocMapped);
	if (err != cudaSuccess)
	{
	throw error("Could not allocate mapped host memory: " + string(cudaGetErrorString(err)));
	}

	return gpuptr(buffer, cudaFreeHost);
	}
	}

	__host__ static void runHost(int dev, gpuptr gpubuf, gpuptr hostbuf, size_t size, size_t count, size_t repeat, cudaMemcpyKind kind)
	{
	const size_t start = 1024;
	const size_t end = size;

	for (size_t size = start; size <= end; size <<= 1)
	{
	for (size_t i = 0; i < repeat; ++i)
	{
	setDevice(dev);

	auto src = hostbuf;
	auto dst = gpubuf;
	if (kind == cudaMemcpyHostToDevice)
	{
	src = gpubuf;
	dst = hostbuf;
	}

	event before;

	for (size_t j = 0; j < count; ++j)
	{
	auto err = cudaMemcpy(src.get(), dst.get(), size, kind);
	if (err != cudaSuccess)
	{
	throw error("Could not copy memory: " + string(cudaGetErrorString(err)));
	}
	}

	event after;

	auto err = cudaDeviceSynchronize();
	if (err != cudaSuccess)
	{
	throw error(cudaGetErrorString(err));
	}

	auto time = (after - before) / count;
	auto bw = ((double) size) / time;

	if (kind == cudaMemcpyHostToDevice)
	{
	fprintf(stdout, "host %4d HtoD %10zu %12.5f %12.3f\n", dev, size, time, bw);
	}
	else
	{
	fprintf(stdout, "%4d host DtoH %10zu %12.5f %12.3f\n", dev, size, time, bw);
	}
	}
	}
	}


	__global__ static void dostuff(int* ptr)
	{
	float j = threadIdx.x;

	for (int i = 1; i < 10000; ++i)
	{
	j = (j + 1) / j;
	}

	if (threadIdx.x == j)
	{
	ptr[0] = j;
	}
	}


	__host__ static void runP2P(bool p2p, int srcdev, gpuptr srcbuf, int dstdev, gpuptr dstbuf, size_t size, size_t count, size_t repeat)
	{
	cudaError_t err;
	const size_t start = 1024;
	const size_t end = size;

	for (size_t size = start; size <= end; size <<= 1)
	{
	for (size_t i = 0; i < repeat; ++i)
	{
	setDevice(srcdev);

	dostuff<<<1, 1>>>((int*) srcbuf.get());

	event before;

	for (size_t j = 0; j < count; ++j)
	{
	if (p2p)
	{
	err = cudaMemcpyPeerAsync(srcbuf.get(), srcdev, dstbuf.get(), dstdev, size);
	}
	else
	{
	err = cudaMemcpyAsync(srcbuf.get(), dstbuf.get(), size, cudaMemcpyDeviceToDevice);
	}

	if (err != cudaSuccess)
	{
	throw error("Could not copy memory: " + string(cudaGetErrorString(err)));
	}
	}

	event after;

	err = cudaDeviceSynchronize();
	if (err != cudaSuccess)
	{
	throw error(cudaGetErrorString(err));
	}

	auto time = (after - before) / count;
	auto bw = ((double) size) / time;
	fprintf(stdout, "%4d %4d DtoD %10zu %12.5f %12.3f\n", srcdev, dstdev, size, time, bw);
	}
	}
	}


	__host__ static void bandwidthTest(bool p2p, bool dtoh, bool htod, bool dtod, int ping, int pong, size_t count, size_t repeat)
	{
	const size_t size = 128ULL << 20;

	gpuptr src = createBuffer(ping, size, true);
	gpuptr dst = createBuffer(pong, size, true);
	gpuptr host = createBuffer(-1, size, false);

	fprintf(stdout, "%4s %4s %4s %10s %12s %12s\n", "src", "dst", "mode", "size", "usecs", "bandwidth");
	if (dtoh)
	{
	runHost(ping, src, host, size, count, repeat, cudaMemcpyDeviceToHost);
	runHost(pong, dst, host, size, count, repeat, cudaMemcpyDeviceToHost);
	}
	if (htod)
	{
	runHost(ping, src, host, size, count, repeat, cudaMemcpyHostToDevice);
	runHost(pong, dst, host, size, count, repeat, cudaMemcpyHostToDevice);
	}
	if (dtod)
	{
	runP2P(p2p, ping, src, pong, dst, size, count, repeat);
	runP2P(p2p, pong, dst, ping, src, size, count, repeat);
	}
	}


	static void enableP2P(int src, int dst)
	{
	cudaError_t err;

	err = cudaSetDevice(src);
	if (err != cudaSuccess)
	{
	throw error(string("Could not set GPU: ") + cudaGetErrorString(err));
	}

	err = cudaDeviceEnablePeerAccess(dst, 0);
	if (err != cudaSuccess)
	{
	throw error(string("Failed to enable P2P for GPU: ") + cudaGetErrorString(err));
	}
	}


	static string formatBdf(int domain, int bus, int device)
	{
	char buffer[64];
	sprintf(buffer, "%04x:%02x:%02x.0", domain, bus, device);
	return string(buffer);
	}


	static string getGpuName(int device)
	{
	auto err = cudaSetDevice(device);
	if (err != cudaSuccess)
	{
	throw error(string("Could not set GPU: ") + cudaGetErrorString(err));
	}

	cudaDeviceProp prop;
	err = cudaGetDeviceProperties(&prop, device);
	if (err != cudaSuccess)
	{
	throw error(string("Could not get GPU properties: ") + cudaGetErrorString(err));
	}

	return string(prop.name) + " " + formatBdf(prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
	}


	static bool checkP2P(int i, int j, bool use_p2p)
	{
	if (i == j)
	{
	return false;
	}

	if (!use_p2p)
	{
	return true;
	}

	int p2p = 0;
	auto err = cudaDeviceCanAccessPeer(&p2p, i, j);
	if (err != cudaSuccess)
	{
	throw error(string("Failed to check for P2P access: ") + cudaGetErrorString(err));
	}

	if (p2p != 1)
	{
	return false;
	}

	err = cudaDeviceCanAccessPeer(&p2p, j, i);
	if (err != cudaSuccess)
	{
	throw error(string("Failed to check for P2P access: ") + cudaGetErrorString(err));
	}

	if (p2p != 1)
	{
	return false;
	}

	return true;
	}



	static void findGpus(gpulist& gpus, bool p2p, int& ping, int& pong)
	{
	cudaError_t err;
	int numGpus = 0;

	err = cudaGetDeviceCount(&numGpus);
	if (err != cudaSuccess)
	{
	throw error(string("Failed to get GPU count: ") + cudaGetErrorString(err));
	}

	if (gpus.empty())
	{
	for (int i = 0; i < numGpus; ++i)
	{
	gpus.push_back(i);
	}
	}

	for (int i: gpus)
	{
	if (gpus.size() > 1)
	{
	for (int j: gpus)
	{
	if (checkP2P(i, j, p2p))
	{
	ping = i;
	pong = j;
	return;
	}
	}
	}
	else
	{
	for (int j = 0; j < numGpus; ++j)
	{
	if (checkP2P(i, j, p2p))
	{
	ping = i;
	pong = j;
	return;
	}
	}
	}
	}

	throw error("Could not find suitable GPUs, try -m option for host memory");
	}


	static string usageString(const char* name)
	{
	return string("Usage: ") + name + string(" [-c <count>] [-r <repeat>] [-m] [-g <gpu>]...");
	}



	static void parseArguments(int argc, char** argv, uint32_t& repeat, uint32_t& count, bool& p2p, bool& dtod, bool& all, gpulist& gpus)
	{
	char* str;
	int opt;
	int gpu = -1;
	int numGpus = 0;

	auto err = cudaGetDeviceCount(&numGpus);
	if (err != cudaSuccess)
	{
	throw error(string("Failed to get GPU count: ") + cudaGetErrorString(err));
	}

	while ((opt = getopt(argc, argv, ":c:r:n:hg:mda")) != -1)
	{
	switch (opt)
	{
	case 'h':
	throw error(usageString(argv[0]));

	case ':':
	throw error(string("Missing value for option: ") + argv[optind-1]);

	case '?':
	throw error(string("Unknown option: ") + argv[optind-1]);

	case 'c':
	str = nullptr;
	count = strtoul(optarg, &str, 0);
	if (str == NULL \|\| *str != '\0' \|\| count == 0)
	{
	throw error("You're doing it wrong!");
	}
	break;

	case 'r':
	case 'n':
	str = nullptr;
	repeat = strtoul(optarg, &str, 0);
	if (str == NULL \|\| *str != '\0' \|\| repeat == 0)
	{
	throw error("You're doing it wrong!");
	}
	break;

	case 'm':
	p2p = false;
	break;

	case 'a':
	all = true;
	break;

	case 'd':
	dtod = true;
	break;

	case 'g':
	str = nullptr;
	gpu = strtol(optarg, &str, 10);
	if (str == NULL \|\| *str != '\0' \|\| gpu < 0 \|\| gpu >= numGpus)
	{
	throw error("Invalid GPU");
	}
	gpus.push_back(gpu);
	break;
	}
	}
	}



	int main(int argc, char** argv)
	{
	uint32_t repeat = 10;
	uint32_t count = 1;
	bool p2p = true;
	int ping = -1;
	int pong = -1;
	bool all = false;
	bool dtod = false;

	try
	{
	gpulist gpus;
	parseArguments(argc, argv, repeat, count, p2p, dtod, all, gpus);

	findGpus(gpus, p2p, ping, pong);

	fprintf(stderr, "PING %d %s\n", ping, getGpuName(ping).c_str());
	if (p2p)
	{
	enableP2P(ping, pong);
	}

	fprintf(stderr, "PONG %d %s\n", pong, getGpuName(pong).c_str());
	if (p2p)
	{
	enableP2P(pong, ping);
	}

	fprintf(stdout, "count=%u\nrepeat=%u\n", count, repeat);
	fprintf(stdout, "uva=%s\n", p2p ? "false" : "true");
	fprintf(stdout, "p2p=%s\n", p2p ? "true" : "false");
	fprintf(stdout, "ping=%d %s\n",
	ping, getGpuName(ping).c_str());
	fprintf(stdout, "pong=%d %s\n",
	pong, getGpuName(pong).c_str());

	cudaDeviceProp props;
	cudaGetDeviceProperties(&props, ping);

	fprintf(stdout, "memory_rate=%d kHz\n", props.memoryClockRate);
	fprintf(stdout, "clock_rate=%d kHz\n", props.clockRate);

	fprintf(stdout, "\n");
	bandwidthTest(p2p, all, all, dtod, ping, pong, count, repeat);
	fprintf(stdout, "\n");
	}
	catch (const error& err)
	{
	fprintf(stderr, "%s\n", err.what());
	return 1;
	}

	return 0;
	}