Source Code¶
model 1¶
image = Input(shape=(block_size,block_size,NUM_CHANNELS))
image_norm = Lambda(sub_mean)(image)
conv1 = Conv2D(16, (3, 3), strides =(1,1),padding='valid', activation='relu')(image_norm)
conv2 = Conv2D(32, (3, 3), strides =(1,1), padding='valid', activation='relu')(conv1)
conv2_pooling = MaxPooling2D(pool_size=(2, 2))(conv2)
conv2_drop = Dropout(rate = 0.5)(conv2_pooling)
flat2 = Flatten()(conv2_drop)
qp = Input(shape=(1,))
qp_n = Lambda(lambda x: x/255)(qp)
flat2_qp = Concatenate(axis=1)([flat2, qp_n])
fc1 = Dense(128, activation='relu')(flat2_qp)
fc1_d = Dropout(rate = 0.5)(fc1)
fc1_qp = Concatenate(axis=1)([fc1_d, qp_n])
output = Dense(num_classes, activation='softmax')(fc1_qp)
model = Model(inputs=[image,qp], outputs=output)
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=['accuracy'])
model 2¶
data = Input(shape=(block_size,block_size,NUM_CHANNELS))
data_norm = Lambda(sub_mean)(data)
conv1 = Conv2D(32, (3, 3), strides =(1,1),padding='valid', activation='relu')(data_norm)
conv1_dropout = Dropout(rate=0.5)(conv1)
conv2 = Conv2D(64, (3, 3), strides =(1,1), activation='relu', padding='valid')(conv1)
conv2_dropout = Dropout(rate=0.5)(conv2)
conv3 = Conv2D(128, (3, 3), strides =(1,1), activation='relu', padding='valid')(conv2)
conv3_dropout = Dropout(rate=0.5)(conv3)
conv3_pooling = MaxPooling2D(pool_size=(2, 2))(conv3_dropout)
conv3_d = Dropout(0.25)(conv3_pooling)
flat3 = Flatten()(conv3_d)
qp = Input(shape=(1,))
qp_n = Lambda(lambda x: x/255)(qp)
concat = Concatenate(axis=1)([flat3, qp_n])
fc1 = Dense(128, activation='relu')(concat)
fc1_d = Dropout(rate=0.5)(fc1)
fc1_qp = Concatenate(axis=1)([fc1_d, qp_n])
#fc2 = Dense(48, activation='relu')(fc1_qp)
#fc2_qp = Concatenate(axis=1)([fc2, qp_n])
output = Dense(num_classes, activation='softmax')(fc1_qp)
model = Model(inputs=[data,qp], outputs=output)
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=['accuracy'])
Encoder Modification¶
The following files are changed to pass the file name to CNN model
aom/src/aom_codec.h¶
Add the pointer of filename to aom_codec_ctx. Ths structure is the member called encoder of the structure stream state.
typedef struct aom_codec_ctx {
const char *filename;
...
} aom_codec_ctx_t;
apps/aomenc.c¶
Pass filename to the member enocder of the stream.
FOREACH_STREAM(stream, streams) {
stream->encoder.filename=input.filename;
initialize_encoder(stream, &global); }
av1/encoder/encoder.h¶
Add member filename to the structure AV1_COMP. Also, add an array member that stores the partition index from CNN model.
typedef struct AV1_COMP {
const char *filename;
...
int prediction[3][135][240];
...
} AV1_COMP;
av1/encoder/encoder.c¶
In the function av1_create_compressor, add the code to set up memory space for the prediction array.
memset(cpi->prediction, 0, sizeof(cpi->prediction));
av1/av1_cx_iface.c¶
In the function encoder_init, pass the filename from aom_codec_ctx to AV1_COMP by adding the following code.
if (res == AOM_CODEC_OK) {
...
priv->cpi->filename = ctx->filename;
}
av1/encoder/encode_frame.c¶
In the function encode_frame_internal, add the following code to call CNN model in python script. This code first calls CNN model for partition prediction in python, and then read the prediction result from cu_depth.txt into the array set up beforehand.
struct aom_usec_timer time_python;
aom_usec_timer_start(&time_python);
char cmd[100];
sprintf(cmd, "python3 video_to_cu_depth_all.py %s %d %d %d %d %d",cpi->filename,cm->width, cm->height, cm->base_qindex, 0, cm->current_frame.order_hint);
printf("%s\n", cmd);
system(cmd)==0;
aom_usec_timer_mark(&time_python);
uint64_t time_for_python = aom_usec_timer_elapsed(&time_python);
printf("time_for_python=%d\n", time_for_python);
struct aom_usec_timer time_r;
aom_usec_timer_start(&time_r);
FILE *in_file = fopen("cu_depth.txt", "r");
if (in_file == NULL)
{
printf("Error! Could not open file\n");
exit(-1); // must include stdlib.h
}
int block_width = cm->width>>6;
int block_height = cm->height>>6;
int cols = block_width*64 < cm->width ? block_width+1 : block_width;
int rows = block_height*64 < cm->height ? block_height+1 : block_height;
int blocksize =0;
for (int row = 0; row < rows; row++){
for (int col = 0; col < cols; col++){
fscanf(in_file,"%1d", &cpi->prediction[blocksize][row][col]);
}
}
block_width = cm->width>>5;
block_height = cm->height>>5;
cols = block_width*32 < cm->width ? block_width+1 : block_width;
rows = block_height*32 < cm->height ? block_height+1 : block_height;
blocksize =1;
for (int row = 0; row < rows; row++){
for (int col = 0; col < cols; col++){
fscanf(in_file,"%1d", &cpi->prediction[blocksize][row][col]);
}
}
block_width = cm->width>>4;
block_height = cm->height>>4;
cols = block_width*16 < cm->width ? block_width+1 : block_width;
rows = block_height*16 < cm->height ? block_height+1 : block_height;
blocksize =2;
for (int row = 0; row < rows; row++){
for (int col = 0; col < cols; col++){
fscanf(in_file,"%1d", &cpi->prediction[blocksize][row][col]);
}
}
aom_usec_timer_mark(&time_r);
uint64_t time_for_read = aom_usec_timer_elapsed(&time_r);
printf("time_for_read=%d\n", time_for_read);
In the function rd_pick_partition, add the following code to replace the original algorithms. Since only 3 block sizes are considered, the condition is set as following code.
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
int row, col, blocksize;
switch(bsize){
case BLOCK_64X64: blocksize=0;
row = mi_row>>4;
col = mi_col>>4;
break;
case BLOCK_32X32: blocksize=1;
row = mi_row>>3;
col = mi_col>>3;
break;
case BLOCK_16X16: blocksize=2;
row = mi_row>>2;
col = mi_col>>2;
break;
}
switch (cpi->prediction[blocksize][row][col])
{
case PARTITION_NONE: goto DO_PARTITION_NONE;
case PARTITION_HORZ: goto DO_PARTITION_HORZ;
case PARTITION_VERT: goto DO_PARTITION_VERT;
case PARTITION_SPLIT: goto DO_PARTITION_SPLIT;
case PARTITION_HORZ_A: goto DO_PARTITION_HORZ_A;
case PARTITION_HORZ_B: goto DO_PARTITION_HORZ_B;
case PARTITION_VERT_A: goto DO_PARTITION_VERT_A;
case PARTITION_VERT_B: goto DO_PARTITION_VERT_B;
case PARTITION_HORZ_4: goto DO_PARTITION_HORZ_4;
case PARTITION_VERT_4: goto DO_PARTITION_VERT_4;
}
}
...
DO_PARTITION_NONE:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
partition_none_allowed=1;
// PARTITION_NONE
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_SPLIT:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
do_square_split=1;
// PARTITION_SPLIT
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_HORZ:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
partition_horz_allowed=1;
prune_horz=0;
do_rectangular_split=1;
}
//PARTITION_HORZ
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_VERT:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
partition_vert_allowed=1;
prune_vert=0;
do_rectangular_split=1;
}
// PARTITION_VERT
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
...
DO_PARTITION_HORZ_A:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
partition_horz_allowed=1;
horza_partition_allowed=1;
}
//PARTITION_HORZ_A:
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_HORZ_B:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
partition_horz_allowed=1;
horzb_partition_allowed=1;
}
// PARTITION_HORZ_B
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_VERT_A:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
verta_partition_allowed=1;
partition_vert_allowed=1;
}
// PARTITION_VERT_A
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_VERT_B:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
vertb_partition_allowed=1;
partition_vert_allowed=1;
}
// PARTITION_VERT_B
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_HORZ_4:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
partition_horz4_allowed=1;
do_rectangular_split=1;
}
// PARTITION_HORZ_4
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
DO_PARTITION_VERT_4:
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
{
partition_vert4_allowed=1;
do_rectangular_split=1;
}
// PARTITION_VERT_4
...
if (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16)
goto ENDING;
...
ENDING: