Last active
January 16, 2023 17:33
-
-
Save tomonari-masada/5d68b5002560d309b057 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <time.h> | |
#include <math.h> | |
#include <omp.h> | |
#define real float | |
#define BUFFSIZE 10000 | |
#define TOKENLEN 1000 | |
#define HASH_SIZE 131073 | |
#define NUM_OF_THREADS 16 | |
#define BORDER "<S>" | |
#define LAYER_NUM 3 | |
#define VEC_DIM 100 | |
#define SGM_THRESHOLD 15.0 | |
#define TANH_THRESHOLD 8.0 | |
#define TABLE_SIZE 50001 | |
#define LEARNING_RATE 0.02 | |
unsigned int nNumOfWords; | |
char **sWords; | |
unsigned int nBorder; | |
real dLearningRate; | |
unsigned long long nNumOfDocs; | |
unsigned int *nDocLen; | |
unsigned int **nWordSeq; | |
unsigned int nMaxDocLen; | |
unsigned int nIteration; | |
unsigned long long nNumOfSamples; | |
real dTotalLength; | |
real dDiffTime; | |
real dSgmTableFactor; | |
real *dSgmTable; | |
real dTanhTableFactor; | |
real *dTanhTable; | |
FILE *fOut; | |
unsigned int nLayerNum; | |
unsigned int *nVecDim; | |
unsigned int nLen; | |
unsigned int *nWSizeI; | |
unsigned int *nWSizeP; | |
unsigned int *nWSizeE; | |
unsigned int *nWSizeO; | |
real **dI; | |
real **dP; | |
real **dE; | |
real **dO; | |
real **dH; | |
real **dAI; | |
real **dAP; | |
real **dAE; | |
real **dAO; | |
real *dY; | |
real *dYHat; | |
real ***dWI; | |
real ***dWP; | |
real ***dWE; | |
real ***dWO; | |
real **dWOut; | |
real *dBiasOut; | |
real **dGH; | |
real **dGE; | |
real **dGAI; | |
real **dGAP; | |
real **dGAE; | |
real **dGAO; | |
real *dGYHat; | |
real ***dGWI; | |
real ***dGWP; | |
real ***dGWE; | |
real ***dGWO; | |
real **dGWOut; | |
real *dGBiasOut; | |
//---------------------------------------------------------------- | |
time_t start_time; | |
void put_time(FILE *f) | |
{ | |
time_t current_time; | |
time(¤t_time); | |
dDiffTime = difftime(current_time, start_time); | |
fprintf(f, "%.0lf", dDiffTime); | |
fflush(f); | |
} | |
real randn() | |
{ return ((real) (((double) rand()) / RAND_MAX)) * 2.0 - 1.0; } | |
//---------------------------------------------------------------- | |
// hash function by Paul Hsieh | |
// http://www.azillionmonkeys.com/qed/hash.html | |
#undef get16bits | |
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ | |
|| defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) | |
#define get16bits(d) (*((const uint16_t *) (d))) | |
#endif | |
#if !defined (get16bits) | |
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ | |
+(uint32_t)(((const uint8_t *)(d))[0]) ) | |
#endif | |
unsigned int SuperFastHash (const char * data, int len) { | |
uint32_t hash = len, tmp; int rem; | |
if (len <= 0 || data == NULL) return 0; | |
rem = len & 3; len >>= 2; | |
for (;len > 0; len--) { | |
hash += get16bits (data); | |
tmp = (get16bits (data+2) << 11) ^ hash; | |
hash = (hash << 16) ^ tmp; | |
data += 2*sizeof (uint16_t); | |
hash += hash >> 11; | |
} | |
switch (rem) { | |
case 3: hash += get16bits (data); | |
hash ^= hash << 16; | |
hash ^= ((signed char)data[sizeof (uint16_t)]) << 18; | |
hash += hash >> 11; | |
break; | |
case 2: hash += get16bits (data); | |
hash ^= hash << 11; hash += hash >> 17; | |
break; | |
case 1: hash += (signed char)*data; | |
hash ^= hash << 10; hash += hash >> 1; | |
} | |
hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; | |
hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; | |
return ((unsigned int) hash) % HASH_SIZE; // by me | |
} | |
//---------------------------------------------------------------- | |
// hash table for words | |
unsigned int *nWordNums; | |
unsigned int **nWordIDs; | |
int initHash() | |
{ | |
unsigned int n; | |
nNumOfWords = 0; | |
nWordNums = (unsigned int *) malloc(sizeof(unsigned int) * HASH_SIZE); | |
nWordIDs = (unsigned int **) malloc(sizeof(unsigned int *) * HASH_SIZE); | |
if ((nWordNums == NULL) || (nWordIDs == NULL)) return 1; | |
for (n = 0; n < HASH_SIZE; n ++) nWordNums[n] = 0; | |
return 0; | |
} | |
unsigned int getWordID(const char *s) | |
{ | |
unsigned int n, nID, nHashKey; | |
nID = 0; | |
nHashKey = SuperFastHash(s, 100); | |
for (n = 0; n < nWordNums[nHashKey]; n ++) | |
if (strcmp(s, sWords[nWordIDs[nHashKey][n] - 1]) == 0) { | |
nID = nWordIDs[nHashKey][n]; | |
break; | |
} | |
return nID; | |
} | |
unsigned int addWord(const char *s) | |
{ | |
unsigned int n, nID, nHashKey; | |
nID = 0; | |
nHashKey = SuperFastHash(s, 100); | |
for (n = 0; n < nWordNums[nHashKey]; n ++) | |
if (strcmp(s, sWords[nWordIDs[nHashKey][n] - 1]) == 0) { | |
nID = nWordIDs[nHashKey][n]; | |
break; | |
} | |
if (nID == 0) { | |
nNumOfWords ++; | |
nWordNums[nHashKey] ++; | |
if (nWordNums[nHashKey] == 1) | |
nWordIDs[nHashKey] = (unsigned int *) malloc(sizeof(unsigned int)); | |
else | |
nWordIDs[nHashKey] | |
= (unsigned int *) realloc(nWordIDs[nHashKey], | |
sizeof(unsigned int) * nWordNums[nHashKey]); | |
nWordIDs[nHashKey][nWordNums[nHashKey] - 1] = nNumOfWords; | |
if (nNumOfWords == 1) | |
sWords = (char **) malloc(sizeof(char *)); | |
else | |
sWords = (char **) realloc(sWords, sizeof(char *) * nNumOfWords); | |
sWords[nNumOfWords - 1] = (char *) malloc(sizeof(char) * (strlen(s) + 1)); | |
memset(sWords[nNumOfWords - 1], 0, strlen(s) + 1); | |
strcpy(sWords[nNumOfWords - 1], s); | |
nID = nNumOfWords; | |
} | |
return nID; | |
} | |
//---------------------------------------------------------------- | |
real sgm(real dA) | |
{ | |
if (dA <= - SGM_THRESHOLD) return 0.0; | |
else if (dA >= SGM_THRESHOLD) return 1.0; | |
else return dSgmTable[((int) ((SGM_THRESHOLD + dA) * dSgmTableFactor))]; | |
} | |
real _tanh(real dA) | |
{ | |
if (dA <= - TANH_THRESHOLD) return - 1.0; | |
else if (dA >= TANH_THRESHOLD) return 1.0; | |
else return dTanhTable[((int) ((TANH_THRESHOLD + dA) * dTanhTableFactor))]; | |
} | |
void initParam() | |
{ | |
unsigned int nCK, nCN, nCC, nC; | |
for (nCN = 0; nCN < nLayerNum; nCN ++) | |
for (nCC = 0; nCC < nNumOfWords * nVecDim[nCN]; nCC ++) | |
dWOut[nCN][nCC] = 0.5 * randn(); | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
dBiasOut[nCK] = 0.5 * randn(); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) { | |
for (nC = 0; nC < nWSizeI[nCN]; nC ++) | |
dWI[nCN][nCC][nC] = 0.5 * randn(); | |
for (nC = 0; nC < nWSizeP[nCN]; nC ++) | |
dWP[nCN][nCC][nC] = 0.5 * randn(); | |
dWP[nCN][nCC][nWSizeP[nCN] - 1] = 1.0e10; | |
for (nC = 0; nC < nWSizeE[nCN]; nC ++) | |
dWE[nCN][nCC][nC] = 0.5 * randn(); | |
for (nC = 0; nC < nWSizeO[nCN]; nC ++) | |
dWO[nCN][nCC][nC] = 0.5 * randn(); | |
} | |
return; | |
} | |
void printParam() | |
{ | |
unsigned int nCK, nCN, nCC, nC; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
fprintf(fOut, "#w %d %s\n", nCK, sWords[nCK]); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) { | |
fprintf(fOut, "#W %d %s [", nCN, sWords[nCK]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) { | |
fprintf(fOut, "%f", dWOut[nCN][nCK * nVecDim[nCN] + nCC]); | |
if (nCC < nVecDim[nCN] - 1) fprintf(fOut, ","); | |
} | |
fprintf(fOut, "]\n"); | |
} | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
fprintf(fOut, "#B %s %f\n", sWords[nCK], dBiasOut[nCK]); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
for (nC = 0; nC < nWSizeI[nCN]; nC ++) { | |
fprintf(fOut, "#I %d %d [", nCN, nC); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) { | |
fprintf(fOut, "%f", dWI[nCN][nCC][nC]); | |
if (nCC < nVecDim[nCN] - 1) fprintf(fOut, ","); | |
} | |
fprintf(fOut, "]\n"); | |
} | |
for (nC = 0; nC < nWSizeP[nCN]; nC ++) { | |
fprintf(fOut, "#P %d %d [", nCN, nC); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) { | |
fprintf(fOut, "%f", dWP[nCN][nCC][nC]); | |
if (nCC < nVecDim[nCN] - 1) fprintf(fOut, ","); | |
} | |
fprintf(fOut, "]\n"); | |
} | |
for (nC = 0; nC < nWSizeE[nCN]; nC ++) { | |
fprintf(fOut, "#E %d %d [", nCN, nC); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) { | |
fprintf(fOut, "%f", dWE[nCN][nCC][nC]); | |
if (nCC < nVecDim[nCN] - 1) fprintf(fOut, ","); | |
} | |
fprintf(fOut, "]\n"); | |
} | |
for (nC = 0; nC < nWSizeO[nCN]; nC ++) { | |
fprintf(fOut, "#O %d %d [", nCN, nC); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) { | |
fprintf(fOut, "%f", dWO[nCN][nCC][nC]); | |
if (nCC < nVecDim[nCN] - 1) fprintf(fOut, ","); | |
} | |
fprintf(fOut, "]\n"); | |
} | |
} | |
return; | |
} | |
real forward(unsigned long long nCD) | |
{ | |
real dLogLH; | |
unsigned int nLen; | |
unsigned int nCT, nCK, nCN, nCC, nTemp; | |
real dTemp; | |
dLogLH = 0.0; | |
nLen = nDocLen[nCD] + 1; | |
for (nCT = 0; nCT < nLen; nCT ++){ | |
if (nCT > 0) nCK = nWordSeq[nCD][nCT - 1]; | |
else nCK = nBorder; | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
#pragma omp parallel | |
{ | |
unsigned nCC, nC; | |
unsigned int nTempW, nTemp; | |
real dTempI, dTempP, dTempE, dTempO, dTemp; | |
#ifdef _OPENMP | |
unsigned int nThreadID = omp_get_thread_num(); | |
unsigned int nNumOfThreads = omp_get_num_threads(); | |
#else | |
unsigned int nThreadID = 0; | |
unsigned int nNumOfThreads = 1; | |
#endif | |
for (nCC = nThreadID; nCC < nVecDim[nCN]; nCC += nNumOfThreads) { | |
dTempI = dWI[nCN][nCC][nCK]; | |
dTempP = dWP[nCN][nCC][nCK]; | |
dTempE = dWE[nCN][nCC][nCK]; | |
dTempO = dWO[nCN][nCC][nCK]; | |
nTempW = nNumOfWords; | |
if (nCN > 0) { | |
for (nC = 0; nC < nVecDim[nCN - 1]; nC ++) { | |
dTemp = dH[nCN - 1][nVecDim[nCN - 1] * nCT + nC]; | |
dTempI += dWI[nCN][nCC][nTempW + nC] * dTemp; | |
dTempP += dWP[nCN][nCC][nTempW + nC] * dTemp; | |
dTempE += dWE[nCN][nCC][nTempW + nC] * dTemp; | |
dTempO += dWO[nCN][nCC][nTempW + nC] * dTemp; | |
} | |
nTempW += nVecDim[nCN - 1]; | |
} | |
if (nCT > 0) | |
for (nC = 0; nC < nVecDim[nCN]; nC ++) { | |
dTemp = dH[nCN][nVecDim[nCN] * (nCT - 1) + nC]; | |
dTempI += dWI[nCN][nCC][nTempW + nC] * dTemp; | |
dTempP += dWP[nCN][nCC][nTempW + nC] * dTemp; | |
dTempE += dWE[nCN][nCC][nTempW + nC] * dTemp; | |
dTempO += dWO[nCN][nCC][nTempW + nC] * dTemp; | |
} | |
nTempW += nVecDim[nCN]; | |
if (nCT > 0) { | |
dTemp = dE[nCN][nVecDim[nCN] * (nCT - 1) + nCC]; | |
dTempI += dWI[nCN][nCC][nTempW] * dTemp; | |
dTempP += dWP[nCN][nCC][nTempW] * dTemp; | |
} | |
dTempI += dWI[nCN][nCC][nTempW + 1]; | |
dTempP += dWP[nCN][nCC][nTempW + 1]; | |
dTempE += dWE[nCN][nCC][nTempW]; | |
dTempO += dWO[nCN][nCC][nTempW + 1]; | |
nTemp = nVecDim[nCN] * nCT + nCC; | |
dAI[nCN][nTemp] = dTempI; | |
dI[nCN][nTemp] = sgm(dTempI); | |
dAP[nCN][nTemp] = dTempP; | |
dP[nCN][nTemp] = sgm(dTempP); | |
dAE[nCN][nTemp] = dTempE; | |
dE[nCN][nTemp] = dI[nCN][nTemp] * _tanh(dAE[nCN][nTemp]); | |
if (nCT > 0) | |
dE[nCN][nTemp] += dP[nCN][nTemp] * dE[nCN][nVecDim[nCN] * (nCT - 1) + nCC]; | |
dTempO += dWO[nCN][nCC][nTempW] * dE[nCN][nTemp]; | |
dAO[nCN][nTemp] = dTempO; | |
dO[nCN][nTemp] = sgm(dTempO); | |
dH[nCN][nTemp] = dO[nCN][nTemp] * _tanh(dE[nCN][nTemp]); | |
} | |
} | |
} | |
nTemp = nCT * nNumOfWords; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) { | |
dTemp = dBiasOut[nCK]; | |
for (nCN = 0; nCN < nLayerNum; nCN ++) | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dTemp += dWOut[nCN][nVecDim[nCN] * nCK + nCC] | |
* dH[nCN][nVecDim[nCN] * nCT + nCC]; | |
dYHat[nTemp + nCK] = dTemp; | |
} | |
dTemp = dYHat[nTemp]; | |
for (nCK = 1; nCK < nNumOfWords; nCK ++) | |
if (dTemp < dYHat[nTemp + nCK]) dTemp = dYHat[nTemp + nCK]; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) dY[nTemp + nCK] = dYHat[nTemp + nCK] - dTemp; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) dY[nTemp + nCK] = exp(dY[nTemp + nCK]); | |
dTemp = 0.0; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) dTemp += dY[nTemp + nCK]; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) dY[nTemp + nCK] /= dTemp; | |
if (nCT == nLen - 1) dLogLH += log(dY[nTemp + nBorder]); | |
else dLogLH += log(dY[nTemp + nWordSeq[nCD][nCT]]); | |
} | |
return dLogLH; | |
} | |
void backward(unsigned long long nCD) | |
{ | |
unsigned int nLen; | |
unsigned int nCT, nCK, nCN, nCC; | |
unsigned int nTemp; | |
nLen = nDocLen[nCD] + 1; | |
nCT = nLen; | |
do { | |
nCT --; | |
nTemp = nCT * nNumOfWords; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
dGYHat[nTemp + nCK] = - dY[nTemp + nCK]; | |
if (nCT < nLen - 1) dGYHat[nTemp + nWordSeq[nCD][nCT]] += 1.0; | |
else dGYHat[nTemp + nBorder] += 1.0; | |
} while (nCT > 0); | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
dGBiasOut[nCK] = 0.0; | |
for (nCN = 0; nCN < nLayerNum; nCN ++) | |
for (nCC = 0; nCC < nNumOfWords * nVecDim[nCN]; nCC ++) | |
dGWOut[nCN][nCC] = 0.0; | |
nCT = nLen; | |
do { | |
nCT --; | |
nTemp = nCT * nNumOfWords; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
dGBiasOut[nCK] += dGYHat[nTemp + nCK]; | |
nCN = nLayerNum; | |
do { | |
nCN --; | |
#pragma omp parallel | |
{ | |
unsigned int nCK, nCNN, nCC, nC; | |
unsigned int nTemp, nTemp2, nTemp3; | |
real dTemp, dTemp2; | |
#ifdef _OPENMP | |
unsigned int nThreadID = omp_get_thread_num(); | |
unsigned int nNumOfThreads = omp_get_num_threads(); | |
#else | |
unsigned int nThreadID = 0; | |
unsigned int nNumOfThreads = 1; | |
#endif | |
nTemp = nCT * nNumOfWords; | |
for (nCK = nThreadID; nCK < nNumOfWords; nCK += nNumOfThreads) | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dGWOut[nCN][nVecDim[nCN] * nCK + nCC] | |
+= dH[nCN][nVecDim[nCN] * nCT + nCC] * dGYHat[nTemp + nCK]; | |
for (nCC = nThreadID; nCC < nVecDim[nCN]; nCC += nNumOfThreads) { | |
nTemp = nVecDim[nCN] * nCT + nCC; | |
nTemp2 = nCT * nNumOfWords; | |
dGH[nCN][nTemp] = 0.0; | |
for (nCK = 0; nCK < nNumOfWords; nCK ++) | |
dGH[nCN][nTemp] += dWOut[nCN][nVecDim[nCN] * nCK + nCC] | |
* dGYHat[nTemp2 + nCK]; | |
if (nCN < nLayerNum - 1) { | |
nCNN = nCN + 1; | |
nTemp2 = nCT * nVecDim[nCNN]; | |
nTemp3 = nNumOfWords + nCC; | |
for (nC = 0; nC < nVecDim[nCNN]; nC ++) { | |
dGH[nCN][nTemp] += dWI[nCNN][nC][nTemp3] * dGAI[nCNN][nTemp2 + nC]; | |
dGH[nCN][nTemp] += dWP[nCNN][nC][nTemp3] * dGAP[nCNN][nTemp2 + nC]; | |
dGH[nCN][nTemp] += dWE[nCNN][nC][nTemp3] * dGAE[nCNN][nTemp2 + nC]; | |
dGH[nCN][nTemp] += dWO[nCNN][nC][nTemp3] * dGAO[nCNN][nTemp2 + nC]; | |
} | |
} | |
if (nCT < nLen - 1) { | |
nTemp2 = (nCT + 1) * nVecDim[nCN]; | |
nTemp3 = nNumOfWords + nCC; | |
for (nC = 0; nC < nVecDim[nCN]; nC ++) { | |
if (nCN > 0) nTemp3 += nVecDim[nCN - 1]; | |
dGH[nCN][nTemp] += dWI[nCN][nC][nTemp3] * dGAI[nCN][nTemp2 + nC]; | |
dGH[nCN][nTemp] += dWP[nCN][nC][nTemp3] * dGAP[nCN][nTemp2 + nC]; | |
dGH[nCN][nTemp] += dWE[nCN][nC][nTemp3] * dGAE[nCN][nTemp2 + nC]; | |
dGH[nCN][nTemp] += dWO[nCN][nC][nTemp3] * dGAO[nCN][nTemp2 + nC]; | |
} | |
} | |
dTemp = _tanh(dE[nCN][nTemp]); | |
dGAO[nCN][nTemp] = dGH[nCN][nTemp] | |
* dTemp * dO[nCN][nTemp] * (1.0 - dO[nCN][nTemp]); | |
nTemp2 = nNumOfWords + nVecDim[nCN]; | |
if (nCN > 0) nTemp2 += nVecDim[nCN - 1]; | |
dGE[nCN][nTemp] = dWO[nCN][nCC][nTemp2] * dGAO[nCN][nTemp]; | |
dGE[nCN][nTemp] += dO[nCN][nTemp] * (1.0 - dTemp * dTemp) * dGH[nCN][nTemp]; | |
if (nCT < nLen - 1) { | |
nTemp3 = nVecDim[nCN] * (nCT + 1) + nCC; | |
dGE[nCN][nTemp] += dP[nCN][nTemp3] * dGE[nCN][nTemp3]; | |
dGE[nCN][nTemp] += dWP[nCN][nCC][nTemp2] * dGAP[nCN][nTemp3]; | |
dGE[nCN][nTemp] += dWI[nCN][nCC][nTemp2] * dGAI[nCN][nTemp3]; | |
} | |
dTemp2 = _tanh(dAE[nCN][nTemp]); | |
dGAE[nCN][nTemp] = dI[nCN][nTemp] * (1.0 - dTemp2 * dTemp2) * dGE[nCN][nTemp]; | |
dGAP[nCN][nTemp] = 0.0; | |
if (nCT > 0) | |
dGAP[nCN][nTemp] += dP[nCN][nTemp] * (1.0 - dP[nCN][nTemp]) | |
* dE[nCN][nVecDim[nCN] * (nCT - 1) + nCC] * dGE[nCN][nTemp]; | |
dGAI[nCN][nTemp] = dI[nCN][nTemp] * (1.0 - dI[nCN][nTemp]) * dTemp * dGE[nCN][nTemp]; | |
} | |
} | |
} while (nCN > 0); | |
} while (nCT > 0); | |
return; | |
} | |
void weight(unsigned long long nCD) | |
{ | |
unsigned int nLen; | |
unsigned int nCN, nCNN; | |
nLen = nDocLen[nCD] + 1; | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
nCNN = 0; | |
if (nCN > 0) nCNN = nCN - 1; | |
#pragma omp parallel | |
{ | |
unsigned int nCT, nCK, nCC, nC; | |
unsigned int nTemp, nTemp2, nTempW; | |
real dTempI, dTempP, dTempE, dTempO; | |
#ifdef _OPENMP | |
unsigned int nThreadID = omp_get_thread_num(); | |
unsigned int nNumOfThreads = omp_get_num_threads(); | |
#else | |
unsigned int nThreadID = 0; | |
unsigned int nNumOfThreads = 1; | |
#endif | |
for (nCC = nThreadID; nCC < nVecDim[nCN]; nCC += nNumOfThreads) { | |
for (nC = 0; nC < nWSizeI[nCN]; nC ++) | |
dGWI[nCN][nCC][nC] = 0.0; | |
for (nC = 0; nC < nWSizeP[nCN]; nC ++) | |
dGWP[nCN][nCC][nC] = 0.0; | |
for (nC = 0; nC < nWSizeE[nCN]; nC ++) | |
dGWE[nCN][nCC][nC] = 0.0; | |
for (nC = 0; nC < nWSizeO[nCN]; nC ++) | |
dGWO[nCN][nCC][nC] = 0.0; | |
for (nCT = 0; nCT < nLen; nCT ++) { | |
nTemp = nCT * nVecDim[nCN] + nCC; | |
dTempI = dGAI[nCN][nTemp]; | |
dTempP = dGAP[nCN][nTemp]; | |
dTempE = dGAE[nCN][nTemp]; | |
dTempO = dGAO[nCN][nTemp]; | |
if (nCT > 0) nCK = nWordSeq[nCD][nCT - 1]; | |
else nCK = nBorder; | |
dGWI[nCN][nCC][nCK] += dTempI; | |
dGWP[nCN][nCC][nCK] += dTempP; | |
dGWE[nCN][nCC][nCK] += dTempE; | |
dGWO[nCN][nCC][nCK] += dTempO; | |
nTempW = nNumOfWords; | |
if (nCN > 0) { | |
nTemp2 = nCT * nVecDim[nCNN]; | |
for (nC = 0; nC < nVecDim[nCNN]; nC ++) { | |
dGWI[nCN][nCC][nTempW + nC] += dTempI * dH[nCNN][nTemp2 + nC]; | |
dGWP[nCN][nCC][nTempW + nC] += dTempP * dH[nCNN][nTemp2 + nC]; | |
dGWE[nCN][nCC][nTempW + nC] += dTempE * dH[nCNN][nTemp2 + nC]; | |
dGWO[nCN][nCC][nTempW + nC] += dTempO * dH[nCNN][nTemp2 + nC]; | |
} | |
nTempW += nVecDim[nCNN]; | |
} | |
if (nCT > 0) { | |
nTemp2 = (nCT - 1) * nVecDim[nCN]; | |
for (nC = 0; nC < nVecDim[nCN]; nC ++) { | |
dGWI[nCN][nCC][nTempW + nC] += dTempI * dH[nCN][nTemp2 + nC]; | |
dGWP[nCN][nCC][nTempW + nC] += dTempP * dH[nCN][nTemp2 + nC]; | |
dGWE[nCN][nCC][nTempW + nC] += dTempE * dH[nCN][nTemp2 + nC]; | |
dGWO[nCN][nCC][nTempW + nC] += dTempO * dH[nCN][nTemp2 + nC]; | |
} | |
} | |
nTempW += nVecDim[nCN]; | |
if (nCT > 0) { | |
nTemp2 = (nCT - 1) * nVecDim[nCN]; | |
dGWI[nCN][nCC][nTempW] += dTempI * dE[nCN][nTemp2 + nCC]; | |
dGWP[nCN][nCC][nTempW] += dTempP * dE[nCN][nTemp2 + nCC]; | |
} | |
dGWO[nCN][nCC][nTempW] += dTempO * dE[nCN][nCT * nVecDim[nCN] + nCC]; | |
dGWI[nCN][nCC][nTempW + 1] += dTempI; | |
dGWP[nCN][nCC][nTempW + 1] += dTempP; | |
dGWE[nCN][nCC][nTempW] += dTempE; | |
dGWO[nCN][nCC][nTempW + 1] += dTempO; | |
} | |
} | |
} | |
} | |
#pragma omp parallel | |
{ | |
unsigned int nCN, nCK, nCC, nC; | |
real dTemp; | |
#ifdef _OPENMP | |
unsigned int nThreadID = omp_get_thread_num(); | |
unsigned int nNumOfThreads = omp_get_num_threads(); | |
#else | |
unsigned int nThreadID = 0; | |
unsigned int nNumOfThreads = 1; | |
#endif | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
for (nCC = nThreadID; nCC < nVecDim[nCN]; nCC += nNumOfThreads) { | |
for (nC = 0; nC < nWSizeI[nCN]; nC ++) { | |
dTemp = dWI[nCN][nCC][nC] + dLearningRate * dGWI[nCN][nCC][nC]; | |
if (isfinite(dTemp)) dWI[nCN][nCC][nC] = dTemp; | |
} | |
for (nC = 0; nC < nWSizeP[nCN]; nC ++) { | |
dTemp = dWP[nCN][nCC][nC] + dLearningRate * dGWP[nCN][nCC][nC]; | |
if (isfinite(dTemp)) dWP[nCN][nCC][nC] = dTemp; | |
} | |
for (nC = 0; nC < nWSizeE[nCN]; nC ++) { | |
dTemp = dWE[nCN][nCC][nC] + dLearningRate * dGWE[nCN][nCC][nC]; | |
if (isfinite(dTemp)) dWE[nCN][nCC][nC] = dTemp; | |
} | |
for (nC = 0; nC < nWSizeO[nCN]; nC ++) { | |
dTemp = dWO[nCN][nCC][nC] + dLearningRate * dGWO[nCN][nCC][nC]; | |
if (isfinite(dTemp)) dWO[nCN][nCC][nC] = dTemp; | |
} | |
} | |
for (nCC = nThreadID; nCC < nNumOfWords * nVecDim[nCN]; nCC += nNumOfThreads) { | |
dTemp = dWOut[nCN][nCC] + dLearningRate * dGWOut[nCN][nCC]; | |
if (isfinite(dTemp)) dWOut[nCN][nCC] = dTemp; | |
} | |
} | |
for (nCK = nThreadID; nCK < nNumOfWords; nCK += nNumOfThreads) { | |
dTemp = dBiasOut[nCK] + dLearningRate * dGBiasOut[nCK]; | |
if (isfinite(dTemp)) dBiasOut[nCK] = dTemp; | |
} | |
} | |
return; | |
} | |
//-------- | |
int main(int argc, char **argv) | |
{ | |
char sBuff[BUFFSIZE]; | |
unsigned int nWordID; | |
char sTempWord[TOKENLEN], sTempDocName[TOKENLEN], sPrevDocName[TOKENLEN]; | |
unsigned long long nCD; | |
unsigned int nCN, nCC; | |
real dLogLH; | |
real dTemp; | |
fOut = stdout; | |
if (initHash()) { fprintf(stderr, "%s : initHash()\n", argv[0]); exit(1); } | |
nNumOfDocs = 0; | |
nDocLen = (unsigned int *) malloc(sizeof(unsigned int)); | |
nWordSeq = (unsigned int **) malloc(sizeof(unsigned int *)); | |
memset(sPrevDocName, 0, TOKENLEN); | |
while (! feof(stdin)) { | |
memset(sBuff, 0, BUFFSIZE); | |
fgets(sBuff, BUFFSIZE - 1, stdin); | |
memset(sTempDocName, 0, TOKENLEN); | |
memset(sTempWord, 0, TOKENLEN); | |
if (sscanf(sBuff, "%s %s\n", sTempDocName, sTempWord) == 2) { | |
if (strcmp(sPrevDocName, sTempDocName) != 0) { | |
nNumOfDocs ++; | |
nDocLen = (unsigned int *) realloc(nDocLen, sizeof(unsigned int) * nNumOfDocs); | |
nWordSeq = (unsigned int **) realloc(nWordSeq, sizeof(unsigned int *) * nNumOfDocs); | |
nDocLen[nNumOfDocs - 1] = 0; | |
nWordSeq[nNumOfDocs - 1] = (unsigned int *) malloc(sizeof(unsigned int)); | |
} | |
nWordID = addWord(sTempWord); | |
if (nWordID > 0) { | |
nDocLen[nNumOfDocs - 1] ++; | |
nWordSeq[nNumOfDocs - 1] | |
= (unsigned int *) realloc(nWordSeq[nNumOfDocs - 1], | |
sizeof(unsigned int) * nDocLen[nNumOfDocs - 1]); | |
nWordSeq[nNumOfDocs - 1][nDocLen[nNumOfDocs - 1] - 1] = nWordID - 1; | |
} else { fprintf(stderr, "Invalid data : %s\n", sTempWord); exit(1); } | |
} | |
memset(sPrevDocName, 0, TOKENLEN); | |
strcpy(sPrevDocName, sTempDocName); | |
} | |
if (nNumOfDocs == 0) { fprintf(stderr, "no valid data.\n"); exit(1); } | |
nBorder = addWord(BORDER); | |
nBorder --; | |
nMaxDocLen = 0; | |
for (nCD = 0; nCD < nNumOfDocs; nCD ++) | |
if (nMaxDocLen < nDocLen[nCD]) nMaxDocLen = nDocLen[nCD]; | |
nMaxDocLen ++; | |
//---------------- | |
dSgmTable = (real *) malloc(sizeof(real) * TABLE_SIZE); | |
dSgmTableFactor = ((real) TABLE_SIZE - 1) / (SGM_THRESHOLD * 2); | |
dTanhTable = (real *) malloc(sizeof(real) * TABLE_SIZE); | |
dTanhTableFactor = ((real) TABLE_SIZE - 1) / (TANH_THRESHOLD * 2); | |
for (nCC = 0; nCC < TABLE_SIZE; nCC ++) { | |
dTemp = exp(((real) nCC) / dSgmTableFactor - SGM_THRESHOLD); | |
dSgmTable[nCC] = dTemp / (1.0 + dTemp); | |
dTemp = exp(((real) nCC) / dTanhTableFactor - TANH_THRESHOLD); | |
dTanhTable[nCC] = (dTemp - 1.0 / dTemp) / (dTemp + 1.0 / dTemp); | |
} | |
//---------------- | |
nLayerNum = LAYER_NUM; | |
nVecDim = (unsigned int *) malloc(sizeof(unsigned int) * nLayerNum); | |
dWOut = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGWOut = (real **) malloc(sizeof(real *) * nLayerNum); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
nVecDim[nCN] = VEC_DIM; | |
dWOut[nCN] = (real *) malloc(sizeof(real) * nNumOfWords * nVecDim[nCN]); | |
dGWOut[nCN] = (real *) malloc(sizeof(real) * nNumOfWords * nVecDim[nCN]); | |
} | |
dBiasOut = (real *) malloc(sizeof(real) * nNumOfWords); | |
dGBiasOut = (real *) malloc(sizeof(real) * nNumOfWords); | |
dY = (real *) malloc(sizeof(real) * nMaxDocLen * nNumOfWords); | |
dYHat = (real *) malloc(sizeof(real) * nMaxDocLen * nNumOfWords); | |
dGYHat = (real *) malloc(sizeof(real) * nMaxDocLen * nNumOfWords); | |
dI = (real **) malloc(sizeof(real *) * nLayerNum); | |
dP = (real **) malloc(sizeof(real *) * nLayerNum); | |
dE = (real **) malloc(sizeof(real *) * nLayerNum); | |
dO = (real **) malloc(sizeof(real *) * nLayerNum); | |
dH = (real **) malloc(sizeof(real *) * nLayerNum); | |
dAI = (real **) malloc(sizeof(real *) * nLayerNum); | |
dAP = (real **) malloc(sizeof(real *) * nLayerNum); | |
dAE = (real **) malloc(sizeof(real *) * nLayerNum); | |
dAO = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGH = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGE = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGAI = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGAP = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGAE = (real **) malloc(sizeof(real *) * nLayerNum); | |
dGAO = (real **) malloc(sizeof(real *) * nLayerNum); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
dI[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dP[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dE[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dO[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dH[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dAI[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dAP[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dAE[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dAO[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dGH[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dGE[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dGAI[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dGAP[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dGAE[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
dGAO[nCN] = (real *) malloc(sizeof(real) * nVecDim[nCN] * nMaxDocLen); | |
} | |
nWSizeI = (unsigned int *) malloc(sizeof(unsigned int) * nLayerNum); | |
nWSizeP = (unsigned int *) malloc(sizeof(unsigned int) * nLayerNum); | |
nWSizeE = (unsigned int *) malloc(sizeof(unsigned int) * nLayerNum); | |
nWSizeO = (unsigned int *) malloc(sizeof(unsigned int) * nLayerNum); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
nWSizeI[nCN] = nNumOfWords + nVecDim[nCN] + 2; | |
if (nCN > 0) nWSizeI[nCN] += nVecDim[nCN - 1]; | |
nWSizeP[nCN] = nNumOfWords + nVecDim[nCN] + 2; | |
if (nCN > 0) nWSizeP[nCN] += nVecDim[nCN - 1]; | |
nWSizeE[nCN] = nNumOfWords + nVecDim[nCN] + 1; | |
if (nCN > 0) nWSizeE[nCN] += nVecDim[nCN - 1]; | |
nWSizeO[nCN] = nNumOfWords + nVecDim[nCN] + 2; | |
if (nCN > 0) nWSizeO[nCN] += nVecDim[nCN - 1]; | |
} | |
dWI = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dWP = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dWE = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dWO = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dGWI = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dGWP = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dGWE = (real ***) malloc(sizeof(real **) * nLayerNum); | |
dGWO = (real ***) malloc(sizeof(real **) * nLayerNum); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) { | |
dWI[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dWI[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeI[nCN]); | |
dWP[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dWP[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeP[nCN]); | |
dWE[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dWE[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeE[nCN]); | |
dWO[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dWO[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeO[nCN]); | |
dGWI[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dGWI[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeI[nCN]); | |
dGWP[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dGWP[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeP[nCN]); | |
dGWE[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dGWE[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeE[nCN]); | |
dGWO[nCN] = (real **) malloc(sizeof(real *) * nVecDim[nCN]); | |
for (nCC = 0; nCC < nVecDim[nCN]; nCC ++) | |
dGWO[nCN][nCC] = (real *) malloc(sizeof(real) * nWSizeO[nCN]); | |
} | |
// print statistics | |
fprintf(fOut, "# %llu docs", nNumOfDocs); | |
fprintf(fOut, ", %u words", nNumOfWords); | |
for (nCN = 0; nCN < nLayerNum; nCN ++) | |
fprintf(fOut, ", %u units (%u)", nVecDim[nCN], nCN); | |
fprintf(fOut, "\n"); | |
fflush(fOut); | |
initParam(); | |
nIteration = 1; | |
dLearningRate = LEARNING_RATE; | |
time(&start_time); | |
while (1) { | |
nNumOfSamples = 0; | |
dLogLH = 0.0; | |
dTotalLength = 0.0; | |
for (nCD = 0; nCD < nNumOfDocs; nCD ++) { | |
dLogLH += forward(nCD); | |
backward(nCD); | |
weight(nCD); | |
nNumOfSamples ++; | |
dTotalLength += (real) (nDocLen[nCD] + 1); | |
if (nNumOfSamples % 100 == 0) { | |
fprintf(fOut, "## %d %llu ", nIteration, nNumOfSamples); | |
fprintf(fOut, "%.0f ", dTotalLength); | |
fprintf(fOut, "%.3f ", exp(- dLogLH / dTotalLength)); | |
put_time(fOut); | |
fprintf(fOut, "\n"); | |
fflush(fOut); | |
} | |
} | |
nIteration ++; | |
printParam(); | |
} | |
exit(0); | |
} |
素晴らしいですね。ボクもやっと多層パーセプトロンを実装したところです。エンジニアでなくても子供でもできるAIの教育ツールを作っています。そのためにGUIを強化し、マウス1つで操作できるように設計しています。画像の教師が終わったので、RNNやLSTMに移行し、時系列データーや文章データーを扱いたいと考えています。いろいろと参考にさせてください。ありがとうございます。よろしくお願いいたします。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to complie:
gcc -Wall -O3 -msse2 -DHAVE_SSE2 -o lstm_omp lstm_omp.c -lm -fopenmp -lgomp