The pure C implementation for Alexnet

I am recording my Alexnet C implementation for Intel SGX usage. Because Intel SGX interfaces can only be written in pure C, even C++ would not work. Assume the dataset is MNIST, the size of the black-white image matrix is (1,28,28). The model parameters matrix number group used in the implementation is 3-dimensional. There are 4-dimensional number group in input/output matrices to record the products during the learning process. Note that each channel of the input matrix, owns an independent group of kernel, but each channel’s kernel parameter update share the same total final output of this layer.

Since much detailed operations such as Convolution/Backward/Activation/Pool have been explained on last blog: The pure C implementation for LeNet5. These would be omitted here.

The visualization of Alexnet structure

[1] The structure of AlexNet

AlexNet strcuture C definition

typedef struct AlexNet
{
	double weight1[INPUT][LAYER1][LENGTH_KERNEL][LENGTH_KERNEL];//Layer1 kernel
	double weight2[LAYER1][LAYER2][LENGTH_KERNEL][LENGTH_KERNEL];//Layer2 Kernel
	double weight3[LAYER2][LAYER3][LENGTH_KERNEL][LENGTH_KERNEL];//Layer3 Kernel
	//double weight5_6[LAYER5 * LENGTH_FEATURE5 * LENGTH_FEATURE5][OUTPUT];
	double weight4[LAYER3][LAYER4][LENGTH_KERNEL][LENGTH_KERNEL];//Layer4 Kernel
	double weight5[LAYER4][LAYER5][LENGTH_KERNEL][LENGTH_KERNEL];//Layer5 Kernel

	double fc1[LAYER5*LENGTH_FEATURE5_2*LENGTH_FEATURE5_2][FC1_OUTPUT];
	double fc2[FC1_OUTPUT][FC2_OUTPUT];
	double fc3[FC2_OUTPUT][FC3_OUTPUT];

	double bias1[LAYER1];
	double bias2[LAYER2];
	double bias3[LAYER3];
	double bias4[LAYER4];
	double bias5[LAYER5];

	double bias_fc1[LAYER3];
	double bias_fc2[LAYER4];
	double bias_fc3[LAYER5];

}AlexNet;

The data structure of input and output during learning

    typedef struct Feature
    {
        double input[INPUT][LENGTH_FEATURE0][LENGTH_FEATURE0];

        double layer1_conv[LAYER1][LENGTH_FEATURE1_1][LENGTH_FEATURE1_1];
        double layer1_pool[LAYER1][LENGTH_FEATURE1_2][LENGTH_FEATURE1_2];

        double layer2_conv[LAYER2][LENGTH_FEATURE2_1][LENGTH_FEATURE2_1];
        double layer2_pool[LAYER3][LENGTH_FEATURE2_2][LENGTH_FEATURE2_2];

        double layer3_conv[LAYER4][LENGTH_FEATURE3_1][LENGTH_FEATURE3_1];
        
        double layer4_conv[LAYER5][LENGTH_FEATURE4_1][LENGTH_FEATURE4_1];


        double layer5_conv[LAYER5][LENGTH_FEATURE5_1][LENGTH_FEATURE5_1];
        double layer5_pool[LAYER5][LENGTH_FEATURE5_2][LENGTH_FEATURE5_2];

        double fc1[FC1_OUTPUT];
        double fc2[FC2_OUTPUT];
        double fc3[FC3_OUTPUT];

        double output[OUTPUT];
    }Feature;

The header pre-defined variables for Alexnet’s hyper-parameters

    #pragma once
    #define LENGTH_KERNEL	3
    #define MAXPOOL_SIZE 2
    #define PADDING 1
    #define LENGTH_FEATURE0	28
    #define LENGTH_FEATURE1_1	(LENGTH_FEATURE0 - LENGTH_KERNEL + 2*PADDING+1) //28-3+2+1 ->28*28
    #define LENGTH_FEATURE1_2	(LENGTH_FEATURE1_1/2) //14*14
    #define LENGTH_FEATURE2_1	(LENGTH_FEATURE1_2 - LENGTH_KERNEL +2*PADDING + 1) //14-3+2+1=14
    #define	LENGTH_FEATURE2_2	(LENGTH_FEATURE2_1/2)  //7*7
    #define LENGTH_FEATURE3_1	(LENGTH_FEATURE2_2 - LENGTH_KERNEL +2*PADDING+ 1) //7*7
    #define LENGTH_FEATURE4_1	(LENGTH_FEATURE3_1 - LENGTH_KERNEL +2*PADDING+ 1)//7*7
    #define MAXPOOL5_2_KERNEL 3
    #define MAXPOOL5_2_STRIDE 2

    #define LENGTH_FEATURE5_1	(LENGTH_FEATURE4_1 - LENGTH_KERNEL+2*PADDING+ 1)	//7*7
    #define LENGTH_FEATURE5_2	((LENGTH_FEATURE5_1 - MAXPOOL5_2_KERNEL)/MAXPOOL5_2_STRIDE+ 1) //(7-3)/2 +1 = 3 no padding

    #define FC1_OUTPUT	1024
    #define FC2_OUTPUT	512
    #define FC3_OUTPUT	10


    #define INPUT			1
    #define LAYER1			32
    #define LAYER2			64
    #define LAYER3			128
    #define LAYER4			256
    #define LAYER5			256

    #define OUTPUT          10

    #define ALPHA 0.5


    typedef unsigned char uint8;
    typedef uint8 image[28][28];

Forward

static void forward(AlexNet *alexnet, double(*action)(double))
{
	CONVOLUTION_FORWARD(features.input, features.layer1_conv, alexnet->weight1, alexnet->bias1, action);
	SUBSAMP_MAX_FORWARD(features.layer1_conv, features.layer1_pool);

	CONVOLUTION_FORWARD(features.layer1_pool, features.layer2_conv, alexnet->weight2, alexnet->bias2, action);
	SUBSAMP_MAX_FORWARD(features.layer2_conv, features.layer2_pool);

	CONVOLUTION_FORWARD(features.layer2_pool, features.layer3_conv, alexnet->weight3, alexnet->bias3, action);
	
	CONVOLUTION_FORWARD(features.layer3_conv, features.layer4_conv, alexnet->weight4, alexnet->bias4, action);

	CONVOLUTION_FORWARD(features.layer4_conv, features.layer5_conv, alexnet->weight5, alexnet->bias5, action);
	SUBSAMP_MAX_FORWARD(features.layer5_conv, features.layer5_pool);

	// Matrix_FC(features->layer5_pool, features->fc1, alexnet->fc1);
	// FC(features->fc1, features->fc2, alexnet->fc2);
	// FC(features->fc2, features->fc3, alexnet->fc3);
	DOT_PRODUCT_FORWARD(features.layer5_pool, features.fc1, alexnet->fc1, alexnet->bias_fc1, action);
	DOT_PRODUCT_FORWARD(features.fc1, features.fc2, alexnet->fc2, alexnet->bias_fc2, action);
	DOT_PRODUCT_FORWARD(features.fc2, features.output, alexnet->fc3, alexnet->bias_fc3, action);
	// CONVOLUTION_FORWARD(features->layer2, features->layer3, lenet->weight2_3, lenet->bias2_3, action);
	// SUBSAMP_MAX_FORWARD(features->layer3, features->layer4);
	// CONVOLUTION_FORWARD(features->layer4, features->layer5, lenet->weight4_5, lenet->bias4_5, action);
	// DOT_PRODUCT_FORWARD(features->layer5, features->output, lenet->weight5_6, lenet->bias5_6, action);
}

Backward

    static void backward(AlexNet *alexnet, double(*actiongrad)(double))
    {
        //printf("Here is Backward\n");
        DOT_PRODUCT_BACKWARD(features.fc2, errors.fc2, errors.output, alexnet->fc3, deltas.fc3, deltas.bias_fc3, actiongrad);
        DOT_PRODUCT_BACKWARD(features.fc1, errors.fc1, errors.fc2, alexnet->fc2, deltas.fc2, deltas.bias_fc2, actiongrad);
        DOT_PRODUCT_BACKWARD(features.layer5_pool, errors.layer5_pool, errors.fc1,alexnet->fc1, deltas.fc1, deltas.bias_fc1, actiongrad)
        
        SUBSAMP_MAX_BACKWARD(features.layer5_conv, errors.layer5_conv, errors.layer5_pool);
        
        CONVOLUTION_BACKWARD(features.layer4_conv, errors.layer4_conv, errors.layer5_conv, alexnet->weight5, deltas.weight5, deltas.bias5, actiongrad);

        CONVOLUTION_BACKWARD(features.layer3_conv, errors.layer3_conv, errors.layer4_conv, alexnet->weight4, deltas.weight4, deltas.bias4, actiongrad);

        CONVOLUTION_BACKWARD(features.layer2_pool, errors.layer2_conv, errors.layer3_conv, alexnet->weight3, deltas.weight3, deltas.bias3, actiongrad);
        SUBSAMP_MAX_BACKWARD(features.layer2_conv, errors.layer2_conv, errors.layer2_pool);

        CONVOLUTION_BACKWARD(features.layer1_pool, errors.layer1_pool, errors.layer2_conv, alexnet->weight2, deltas.weight2, deltas.bias2, actiongrad);
        SUBSAMP_MAX_BACKWARD(features.layer1_conv, errors.layer1_conv, errors.layer1_pool);

        CONVOLUTION_BACKWARD(features.input, errors.input, errors.layer1_conv, alexnet->weight1, deltas.weight1, deltas.bias1, actiongrad);
    }

Because of the memory limitation, I eliminate the buffer[ ] here, which can also save the execution time and resources, but decreases the code readability. Maybe also can eliminate it in LeNet-5, when necessary.

maxpool backward does not record the max specific position of the input matrix, it simply aplies the start point of original max kernel

Implementation draft note

Padding test

```C double input[2][4][4]={1}; double input_pad[2][6][6]={0}; for(int i=0;i<4;i++){ for(int j=0;j<4;j++){ input[0][i][j] =1; printf(“[%d][%d] %f”,i,j,input[0][i][j]);

        }
        printf("\n");
    }
    printf("\n");
    for(int i=0;i<4;i++){
        for(int j=0;j<4;j++){
            input[1][i][j] =2;
            printf("[%d][%d] %f",i,j,input[1][i][j]);
            
        }
        printf("\n");
    }
    PADDING_fill(input_pad,input);
    printf("\n");
    for(int i=0;i<6;i++){
        for(int j=0;j<6;j++){
            printf("[0][%d][%d] %f",i,j,input_pad[0][i][j]);
        }
        printf("\n");
    }
    printf("\n");
        for(int i=0;i<6;i++){
        for(int j=0;j<6;j++){
            printf("[1][%d][%d] %f",i,j,input_pad[1][i][j]);
        }
        printf("\n");
    }

    sleep(300);
```

If the group is not cited as diemsional test:

```C
            double test[2][2][2]={ 0 };
        for(int i=0;i<2;i++){
            for(int j=0;j<2;j++){
                for(int k=0;k<2;k++){
                        test[i][j][k] = (i)*4+(j*2)+k+1;
        }
            }}
        for(int i=0;i<8;i++){
            printf("%f\n",test[i]);
        }
    printf("DOT test\n");
        for(int i=0;i<2;i++){
            for(int j=0;j<2;j++){
                for(int k=0;k<2;k++){
                        printf("%f\n",test[i][j][k]);
        }
            }}
    ```
    
    The output would be:

    ```C
    0.000000
    0.000000
    0.000000
    0.000000
    0.000000
    0.000000
    0.000000
    0.000000
    DOT test
    1.000000
    2.000000
    3.000000
    4.000000
    5.000000
    6.000000
    7.000000
    8.000000

    ```

    Test2:
        ```C
        	double test[2][1][1]={ 0 };
            // for(int i=0;i<2;i++){
            // 	for(int j=0;j<2;j++){
            // 		for(int k=0;k<2;k++){
            // 				test[i][j][k] = (i)*4+(j*2)+k+1;
            // }
            //	}}
            test[0][0][0]=1;
            test[1][0][0]=2;
            for(int i=0;i<2;i++){
                printf("%f\n",test[i]);
            }
        printf("DOT test\n");
        printf("The ((double *)test[0]) is %f\n",((double *)test)[0]);
        printf("%f\n",test[1]);

            // for(int i=0;i<2;i++){
            // 	for(int j=0;j<2;j++){
            // 		for(int k=0;k<2;k++){
            // 				printf("%f\n",test[i][j][k]);
            // }
            // 	}}
            sleep(300);
```

The output is:
```C
    Here is TrainBatch 0
    0.000000
    0.000000
    DOT test
    The ((double *)test[0]) is 1.000000
    1.000000
    ^C
```