This example shows how the task context buffer is used as a large lookup table to convert the 16 bit input data to 8 bit output data.
For all -32768 <= in <32768 / 0, in < -4096 Table(n)=| in/32 -4096 <=in<4096 \ 255, in >= 4096
The following is the stripped down code list. The routines of less interest have been removed to allow you to focus on the key features. Because the task context buffer (the lookup table) is already initialized by the host code and the table is used as read-only data, you do not need the context setup and context merge functions on the accelerator side.
/* ---------------------------------------------- */ /* data structures shared by host and accelerator */ /* ---------------------------------------------- */ typedef struct _my_task_context_t { alf_data_byte_t table[65536]; } my_task_context_t; typedef struct _my_wb_parms_t { alf_data_uint32_t num_data; /* number of data in this WB */ } my_wb_parms_t;
alf_task_desc_create(alf_handle, 0, &task_desc_handle;); /* set up the task descriptor ... ... */ /* the computing kernel name */ alf_task_desc_set_int64(task_desc_handle, ALF_TASK_DESC_ACCEL_KERNEL_REF_L, "comp_kernel"); /* the task context buffer size */ alf_task_desc_set_int32(task_desc_handle, ALF_TASK_DESC_TSK_CTX_SIZE, sizeof(my_task_context_t)); /* the work block parm buffer size */ alf_task_desc_set_int32(task_desc_handle, ALF_TASK_DESC_WB_PARM_CTX_BUF_SIZE, sizeof(my_wb_parms_t)); /* the input buffer size */ alf_task_desc_set_int32(task_desc_handle, ALF_TASK_DESC_WB_IN_BUF_SIZE, PART_SIZE*sizeof(alf_data_int16_t)); /* the output buffer size */ alf_task_desc_set_int32(task_desc_handle, ALF_TASK_DESC_WB_OUT_BUF_SIZE, PART_SIZE*sizeof(alf_data_byte_t)); /* the task context entry */ alf_task_desc_ctx_entry_add(task_desc_handle, ALF_DATA_BYTE, sizeof(my_task_context_t)/sizeof(alf_data_byte_t));
/* creating wb and adding param & io buffer */ for (i = 0; i < NUM_DATA; i += PART_SIZE) { alf_wb_create(task_handle, ALF_WB_SINGLE, 0, &wb_handle); alf_wb_dtl_begin(wb_handle, ALF_BUF_IN, 0); /* input */ alf_wb_dtl_entry_add(wb_handle, pcm16_in+i, PART_SIZE, ALF_DATA_INT16); alf_wb_dtl_end(wb_handle); alf_wb_dtl_begin(wb_handle, ALF_BUF_OUT, 0); /* output */ alf_wb_dtl_entry_add(wb_handle, pcm8_out+i,PART_SIZE, ALF_DATA_BYTE); alf_wb_dtl_end(wb_handle); wb_parm.num_data = PART_SIZE; alf_wb_parm_add(wb_handle, (void *)&wb_parm, /* wb parm */ sizeof(wb_parm)/sizeof(unsigned int), ALF_DATA_INT32, 0); alf_wb_enqueue(wb_handle); }
/* ---------------------------------------------- */ /* the accelerator side code */ /* ---------------------------------------------- */ /* the computation kernel function */ int comp_kernel(void *p_task_context, void *p_parm_ctx_buffer, void *p_input_buffer, void *p_output_buffer, void *p_inout_buffer, unsigned int current_count, unsigned int total_count) { my_task_context_t *p_ctx = (my_task_context_t *) p_task_context; my_wb_parms_t *p_parm = (my_wb_parms_t *) p_parm_ctx_buffer; alf_data_int16_t *in = (alf_data_int16_t *)p_input_buffer; alf_data_byte_t *out = (alf_data_byte_t *)p_output_buffer; unsigned int size = p_parm->num_data; unsigned int i; // it is just a simple table lookup for(i=0;i<size;i++) { out[i] = p_ctx->table[(unsigned short)in[i]]; } return 0; }