/* ***************************************************************************** * * FILE NAME : time.c * PROGRAMMER(S) : Robert Arnold, Chris Healy * DATE CREATED : July 3, 1993 * ****************************************************************************** */ #include "instdef.h" #include "stats.h" #include "time.h" #include "tree.h" #ifndef INST_H #include "inst.h" #endif #ifdef __SIB_DEBUG__ // #include "tracer.h" bool sib_verbose = true ; #endif // __SIB_DEBUG__ /* Added by Sibin M on Oct 25, 2004. * Process of cleaning up the TA for C++ */ #include "timedecl.h" #include "input.h" #include "depend.h" #include "path.h" #include "treedecl.h" #include "lib.h" #include "instdecl.h" #include "isa.h" #include "timeds.h" // #include /* Added by Sibin M on Oct 25, 2004. * Process of cleaning up the TA for C++ */ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ #include "polynomial.h" extern string poly_string ; extern string variables_string ; #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #include #include // #include #include #include #include // #include // #ifdef DISPLAY #define printc printf #else #define printc #endif // #define __SIB_DEBUG__ #ifdef __SIB_DEBUG__ // bool sib_verbose = false ; #endif // __SIB_DEBUG__ // extern void Process_INF_File(); extern struct inst_block * Determine_Inst_Set(); /*Adding PARAMETRIZE and DO_DATA_CACHE for FAST */ bool pipeline_only = false, traditional = false, verbose = false, wrap_around = false, compute_pd = false, naive_too = false, wc_only = false, bc_only = false, overlap = true, only_fetch = false, ignore_value = false, cache_only = false, PARAMETERIZE = false, DO_DATA_CACHE = false; int path_beg_cycles[NUM_STAGES], path_end_cycles[NUM_STAGES], path_cycles, num_paths, inner_ff_trans, inner_extra_cycles, all_cache_hits, *delay_list, *delay_list_temp, linesize, numsets, setsize, words_per_line, *avail_time, max_intraline_delay, mhz, err_level, max_jumping_delay, iterations_handled, max_iterations, memory_count = 0, max_memory_count = 0, invocation_num = 0, constant_penalty, call_site, dveclen, int2char, intscale, bitsperint, miss_delay; /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ int difference_steady_first = 0 ; struct polynomial *path_polynomial = NULL; #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ /*Adding new Variables for the FAST work*/ int DC_MISS_LATENCY = 0; struct instruct *beg_occupant[NUM_STAGES], *end_occupant[NUM_STAGES]; /* global union path information allows us to detect data hazards just after an inner loop: (3/6) */ /* Modified by Sibin M on Oct 25, 2004. * Process of cleaning up TA for C++ * * Changed the definition of all functions to meet current coding standards. */ /* Modified by Sibin M on Sept. 13, 2004. * Removed the earlier time_main() and replaced it with a main() */ //time_main(argc, argv) int main( int argc, char* argv[] ) { int i,j, start_time, stop_time; string inputfile, istfile; struct inf_func_element *inf = (struct inf_func_element *) NULL; struct inf_func_element *func_ptr; struct inst_block *inst_set; struct tnode *main_func; struct tnode *timing_tree; struct loop_node *temp_ln; struct loop_path_node *temp_lpn; struct loop_block_list *temp_lbl; struct tnode *temp_node; struct loop_block_list *temp_list; /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ FILE* param_fp ; #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ // start_time = ttime(); /* err_level = malloc_debug(1); */ for (i=0;iloop and *inf->block linked lists */ Process_INF_File(&inf, inputfile, inst_set); printf("Done with the inf file \n"); if (! ignore_value) { /*The following functions work with constraints in the CFG. The work is primarily from Dr. Healy's dissertation */ host_info(); set_vector_length(inf); block_constraints(inf); printf("Done with the block_constraints\n"); } } } #ifdef DISPLAY compute_pd = true; #endif /*BOOKMARK- If PARAMETERIZE is true we need to calculate only the ideal number of cycles, so we modify cache miss latencies (data and instr)*/ if (PARAMETERIZE) { for (i=0; i next) { /*This function creates paths and loop nodes for each function in inf*/ Create_Timing_Paths (&func_ptr); /* following is used for debugging*/ if (verbose) { printf("ALL PATHS IN %s\n",func_ptr->func); for (temp_ln = func_ptr->loop_nodes; temp_ln; temp_ln= temp_ln->next) { for (temp_lpn = temp_ln->path; temp_lpn; temp_lpn= temp_lpn->next) { for (temp_lbl = temp_lpn->path_list; temp_lbl; temp_lbl = temp_lbl->next) { if (temp_lbl->is_loop_node) printf(" %dLOOP ", temp_lbl->block_number); else printf(" %d ",temp_lbl->block_number); } printf("\n"); } } } } printf("Returning from Create_Timing_Paths\n"); Build_Timing_Tree (inf, istfile, &timing_tree, &main_func); printf("Returning from Build_Timing_Tree\n"); adjust_instcats (timing_tree); adjust_firstmisses (timing_tree); /* adds 9 cycles to sort (5/25) */ adj_more_fm (timing_tree); printf("Done with adjusting misses \n"); /* following is used for debugging*/ if (verbose) { printf("A peek at the tree structure ----\n"); Peek_At_Tree(timing_tree); } Batch_Time(timing_tree); printf("Returning from Batch_Time\n"); if (PARAMETERIZE) { Print_Parameters(main_func); } else { Print_Results(main_func); } /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ if (main_func->need_polynomial) { printf("Final polynomial answer: "); if (difference_steady_first != 0) { struct term *new_const_term = (struct term*)malloc(sizeof(struct term)); memory_count+=sizeof(struct term); memset(new_const_term, 0, sizeof(struct term)); new_const_term->previous = NULL; new_const_term->next = NULL; new_const_term->coefficient = difference_steady_first; new_const_term->power = 1; add_term_structure( main_func->polynomial_worst_case, new_const_term); } pretty_print_polynomial(main_func->polynomial_worst_case); printf("\n"); pretty_print_list(main_func->polynomial_worst_case); /* also create _eval.c file, but only if parametric 3/23/05 */ createEvals(argc, argv, main_func, inf); } /* indicate to the invocation environment whether parametric 3/24/05 */ param_fp = fopen("param.txt", "w"); fprintf(param_fp, "%s\n", (main_func->need_polynomial) ? "1" : "0"); fclose(param_fp); #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ printf("PROGRAM DONE...\n"); #ifdef INTERFACE user_interf(argc, argv, inf, timing_tree); #endif printc ("Dynamically allocated memory = %d bytes\n", max_memory_count); // stop_time = ttime(); // printf ("response time = %5.3f\n", (stop_time - start_time)/1000.0); return 0; } /* * This function returns in milliseconds the amount of compiler time used * prior to it being called. "cutime" counts child times, although * I don't think that happens in the timing analyzer. I'm just being * consistent with the compiler. */ // int ttime() // { // struct tms buffer; // int utime; // times(&buffer); // utime = int( ((buffer.tms_utime /* + buffer.tms_cutime*/) / 60.0) * 1000.0 ) ; // return (utime); // } /* * Dr. Whalley wrote this function 4/30/1995 to correct the .ist file * in certain cases. * * adjust_instcats - adjusts instruction categorizations that have * been misclassified */ void adjust_instcats( struct tnode* root ) { struct tnode *node_ptr, *pnode_ptr; struct inst_cat *inst_ptr; struct category_list *cat_ptr; bool loop_encountered; /* for each node in the tree */ for (node_ptr = root; node_ptr; node_ptr = node_ptr->next) /* for each instruction within the function or loop */ for (inst_ptr = node_ptr->instruct_list; inst_ptr; inst_ptr = inst_ptr->next) { /* for each category level for the instruction */ loop_encountered = false; pnode_ptr = node_ptr; for (cat_ptr = inst_ptr->cat_list; cat_ptr; cat_ptr = cat_ptr->next) { /* if have a first hit, then adjust it and higher level worst-case categorizations for the instructions when a loop has already been encountered */ if (cat_ptr->worst_case == 'i') { if (loop_encountered) { fprintf(stderr, "misclassification in %s instance %d at instruction %d\n", node_ptr->name, node_ptr->instance, inst_ptr->inst_number); for (; cat_ptr; cat_ptr = cat_ptr->next) cat_ptr->worst_case = 'm'; } break; } /* check if a loop has been encountered I added 2 prelim checks to avoid segv (10/1) */ if (pnode_ptr && pnode_ptr->loop_paths && pnode_ptr->loop_paths->max_iterations > 1) loop_encountered = true; /* I added a check here too (10/1) */ if (pnode_ptr) pnode_ptr = pnode_ptr->parent; else break; } } } /* Dr. Whalley wrote this function to change some f's to m's. (5/25) * * adjust_firstmisses - adjusts first miss instruction categorizations * to misses when they will never be first misses * in loops */ void adjust_firstmisses( struct tnode* root ) { struct tnode *node_ptr, *pnode_ptr; struct inst_cat *inst_ptr; struct category_list *cat_ptr; bool loop_encountered; /* for each node in the tree */ for (node_ptr = root; node_ptr; node_ptr = node_ptr->next) /* for each instruction within the function or loop */ for (inst_ptr = node_ptr->instruct_list; inst_ptr; inst_ptr = inst_ptr->next) { /* if the lowest level category of the instruction is a first miss */ if (inst_ptr->cat_list->worst_case == 'f') { /* for each category level for the instruction */ loop_encountered = false; pnode_ptr = node_ptr; for (cat_ptr = inst_ptr->cat_list; cat_ptr; cat_ptr = cat_ptr->next) { /* check if the instruction has become a miss */ if (cat_ptr->worst_case == 'm' || cat_ptr->worst_case == 'i') break; /* check if a loop has been encountered */ if (pnode_ptr->loop_paths->max_iterations > 1) { loop_encountered = true; break; } pnode_ptr = pnode_ptr->parent; } if (!loop_encountered) { fprintf(stderr, "reclassification of first miss " "in %s instance %d at instruction %d\n", node_ptr->name, node_ptr->instance, inst_ptr->inst_number); for (cat_ptr = inst_ptr->cat_list; cat_ptr; cat_ptr = cat_ptr->next) { if (cat_ptr->worst_case == 'f') cat_ptr->worst_case = 'm'; else break; } for (cat_ptr = inst_ptr->cat_list; cat_ptr; cat_ptr = cat_ptr->next) { if (cat_ptr->best_case == 'f') cat_ptr->best_case = 'm'; else break; } } } } } /* * adj_more_fm : Re-classify those first misses that occur in loops and are * categorized as f in a higher level that is not a loop. Keep * calling it a f in the loop levels, but change the non-loop f * to m. Often a f->f trans occurs in loop->function, and it * really is not a f in outer level because a fm in a function is * more accurately identified as an always miss. 2/15/96 */ void adj_more_fm( struct tnode* root ) { struct tnode *node_ptr, *pnode_ptr; struct inst_cat *inst_ptr; struct category_list *cat_ptr; /* for each node in the tree */ for (node_ptr = root; node_ptr; node_ptr = node_ptr->next) { /* for each instruction within the function or loop */ for (inst_ptr = node_ptr->instruct_list; inst_ptr; inst_ptr = inst_ptr->next) { /* if the lowest level category of the instruction is a first miss */ if (inst_ptr->cat_list->worst_case == 'f' && inst_ptr->cat_list->next && node_ptr->loop_paths->max_iterations > 1) { /* look at higher loop levels */ for (cat_ptr = inst_ptr->cat_list->next, pnode_ptr = node_ptr->parent; cat_ptr; cat_ptr = cat_ptr->next, pnode_ptr = pnode_ptr->parent) { if (cat_ptr->worst_case != 'f') break; if (pnode_ptr->loop_paths->max_iterations == 1) { fprintf (stderr, "need to change f to m in %s for inst %d\n", node_ptr->name, inst_ptr->inst_number); cat_ptr->worst_case = 'm'; } } } } } } /***************************************************************************** * * FUNCTION NAME: Batch_Time * *******************************************************************************/ void Batch_Time( struct tnode* root ) { bool change; struct tnode * node_ptr, *temp_node; do { change = false; /*AMOL: For all functions do the worst case and best case analysis and do this again if there is change*/ for (node_ptr = root; node_ptr; node_ptr = node_ptr->next) { if (! node_ptr->timed && All_Children_Timed(node_ptr)) { /* * TIME THIS NODE * * For every node, the node_ptr->inf_ptr points to * the inf func element that contains the information * for that node, whether the node is a function node * or a loop node */ if (verbose) { printc ("\n\nnode is %s", node_ptr->name); if (node_ptr->parent) printc (" parent is %s", node_ptr->parent->name); printc ("\n"); } /*FIXIT- PERMANTLY SETTING WC_ONLY INTO TO true */ wc_only = true; if (! bc_only) Time_Worst_Case (node_ptr->inf_ptr, node_ptr); if (! wc_only) Time_Best_Case (node_ptr->inf_ptr, node_ptr); node_ptr->timed = true; change = true; } } } while (change); } /***************************************** * * FUNCTION NAME: All_Children_Timed * *****************************************/ bool All_Children_Timed( struct tnode* node_ptr ) { struct tnode *child_ptr; if (node_ptr->child) { for (child_ptr = node_ptr->child; child_ptr; child_ptr = child_ptr->sibling) { if (! child_ptr->timed) return false; } } return true; } /***************************************************************************** * * FUNCTION NAME: Time_Worst_Case * * ASSUMPTIONS: * * This function is the latest version where each node has a max time, * base time and an adjust value. * * 1. It is assumed that the node being timed will already have the * list of exit blocks created. * * GENERAL COMMENTS: * this function is going to go through the list of exit blocks * for the node passed in and determine: * worst case time: max # cycles for the max_number of iterations * base time: max # cycles where each first miss = hit * adjust value: total sum of all the first miss penalities * * * The algorithm is going to choose the max "new" path for each iteration * until it is time to exit the node. Then it will select the max exit path * that exits out of the specified exit block. Therefore, the times for * each of the desired values is going to be the same until the last * iteration is reached and the unique exit path is chosen. * * *NOTE- THIS FUNCTION WILL BE MODIFIED, ALONG WITH TIME_PATH TO ACCOMODATE * PARAMETERIZING OF THE TIMING ANALYZER WITH RESPECT TO FREQUENCY. * Basically code will be added so that when comparing 2 paths, the * equations will be considered instead of cycles given by the time_path function. * *******************************************************************************/ void Time_Worst_Case( struct inf_func_element* inf, struct tnode* node_ptr ) { int i, delay, stage, base_time, total_time, diff, fm_encountered, longest_path_time, total_extra_time, total_extra_fh_time, total_extra_fm_time, union_release_time[NUMREGS], union_src_needed[NUMREGS], cum_extra_time, longest_fmiss_time, union_src_stage[NUMREGS], total_pc_nai_time, longest_pc_nai_path_time, longest_pipe_nai_path_time, total_pipe_nai_time, max_pipeline, total_first_line_delay, total_last_line_delay, total_delay_after_fm, prev_iter_done[NUM_STAGES], new_iter_start[NUM_STAGES], sums[NUM_STAGES], new_iter_done[NUM_STAGES], smallest_sum, total_fm_dead_cycles, diffs[NUM_STAGES], reduce, fh_encountered, longest_pipeline_time, abs_max_iterations, abs_total_time = 0, iterations_remaining, iterations_required, iterations_to_do, range_prev_iter_done, max_new_iter_done, total_shared_fm_time, avg_total_time, incr, min_value; int base_latency = 0, max_latency = 0, temp_latency = 0, longest_fmiss_latency = 0, cum_extra_latency = 0, longest_pipeline_latency = 0, longest_path_latency = 0, total_latency = 0, avg_total_latency = 0, abs_total_latency = 0, data_status = false, total_overlap = 0, temp_extra_time =0, temp_diff = 0, total_icache =0, total_dcache =0, path_cond = false; /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ int incr_path_cycles = 0 ; int worst_path_cycles = 0 ; int need_polynomial = 0 ; struct polynomial *tp_polynomial_list = NULL; struct loop_element *found_loop_element = NULL; #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ /* Added by Sibin M on MApril 27, 2004. * This contains return value of adjust_for_variable_cycles() */ int variable_cycles = 0 ; struct delay_after_fm_node *delay_after_fm_temp; int take_branch = false; long visa_worst_case = 0; struct loop_path_node *longest_path_ptr, *longest_fmiss_path_ptr; struct union_list_node *beg_head[NUM_STAGES], *end_head[NUM_STAGES], *beg_temp[NUM_STAGES], *end_temp[NUM_STAGES], *loop_beg_head[NUM_STAGES], *loop_end_head[NUM_STAGES], *prev_end_head[NUM_STAGES]; struct cache_block *loop_fm_list, *loop_fh_list, *cum_fm_list, *longest_fmiss_fm_list, *cblock; struct cache_block *tmp_ptr = NULL; /* used when a first miss list is not needed */ struct cache_block *tmp_fh_ptr = NULL; /* tmp list of first hits */ struct exit_block *exit_block_ptr; /* ptr to exit block in list exit blocks for ea node */ struct loop_path_node *path_ptr; struct range_node *range_ptr; struct loop_element *loop_ele_ptr; /*This function is given a node and it finds WC path among all the available pathe */ /*INITIALIZE VARIABLES */ /*First all vairables are initialized.*/ if (naive_too) { total_pc_nai_time = 0; total_pipe_nai_time = 0; longest_pipe_nai_path_time = 0; longest_pc_nai_path_time = 0; node_ptr->wc_pipe_nai_time = 0; node_ptr->wc_pc_nai_time = 0; } node_ptr->wc_pipeline_time = 0; node_ptr->extra_time = 0; max_iterations = node_ptr->loop_paths->max_iterations; abs_max_iterations = node_ptr->loop_paths->abs_max_iterations; abs_max_iterations = max_iterations; /* 4/18/99 fix */ num_paths = set_path_numbers(node_ptr); if (verbose) { printc ("number of paths: %d\n", num_paths); printc ("number of iterations: %d\n", max_iterations); printc ("absolute max # iterations: %d\n", abs_max_iterations); print_block_info(inf, node_ptr); } if (! ignore_value) { init_branch_info(inf, node_ptr); infeasible_paths(inf, node_ptr); path_influence(inf, node_ptr); create_imm_follow(node_ptr); if (verbose) print_imm_follow(node_ptr); reachability(node_ptr); if (verbose) print_path_reach(node_ptr); find_compares(inf, node_ptr, WORST); } cum_fm_list = NULL; total_time = 0; avg_total_time = 0; base_time = 0; cum_extra_time = 0; total_extra_fh_time = 0; total_extra_fm_time = 0; total_shared_fm_time = 0; total_first_line_delay = 0; total_delay_after_fm = 0; total_last_line_delay = 0; total_fm_dead_cycles = 0; iterations_handled = 0; longest_path_time = 0; loop_fm_list = NULL; loop_fh_list = NULL; node_ptr -> extra_fh_time = 0; node_ptr -> extra_fm_time = 0; node_ptr -> first_line_delay = 0; node_ptr -> delay_after_fm = 0; node_ptr -> last_line_delay = 0; node_ptr -> fm_dead_cycles = 0; /* * For all continue paths, initialize the number of required and * nonrequired iterations based on the min & max iterations that * were determined by "value-based" constraints. Nov. 21, 1998 * Also, compute iterations_required as the sum of all the required * iterations of all the continue paths in this loop. */ iterations_required = 0; for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { if (path_ptr->path_type_mask == NONE) continue; path_ptr->required_iters_left = path_ptr->min_iter; path_ptr->nonrequired_iters_left = path_ptr->max_iter - path_ptr->min_iter; iterations_required += path_ptr->min_iter; } /* * If we are not performing value dependent constraint analysis, * set the above #iterations to default values. 4/20/99 */ if (ignore_value) { iterations_required = 0; for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { path_ptr->required_iters_left = 0; if (path_ptr->path_type_mask == (BACKEDGE | EXIT)) path_ptr->nonrequired_iters_left = abs_max_iterations; else if (path_ptr->path_type_mask & BACKEDGE) path_ptr->nonrequired_iters_left = abs_max_iterations - 1; else if (path_ptr->path_type_mask & EXIT) path_ptr->nonrequired_iters_left = 1; } } for (i = 0; i < NUMREGS; ++i) /* init union's register info */ { union_release_time[i] = NEVER; union_src_needed[i] = NEVER; union_src_stage[i] = NO_STAGE; } for (stage = 0; stage < NUM_STAGES; ++stage) { beg_head[stage] = NULL; end_head[stage] = NULL; /* these next 2 lines initialize the loop's (union) pipeline info */ loop_beg_head[stage] = NULL; loop_end_head[stage] = NULL; } /*READ COMMENT BELOW... WE CALCULATE 2 THINGS.. THE BASE TIME AND THE MAX_PIPELINE TIME... THE MAX_PIPELINE IS USED TO CREATE THE UNION*/ /* find the longest pipeline time for the paths we will need below to create the union. This is not the same as the base_time, which assumes fm=hit. (9/24) */ max_pipeline = 0; max_latency = 0; /*READ COMMENT BELOW*/ /*Below, Time_Path is called for the first time. All paths of the node are timed by the time_path function and the maximum time among all the paths is chosen and stored in max_pipeline */ /*NOW IF USING THE FREQUENCY MODEL, WE HAVE TO USE A DIFFRENT FORM OF MAX_PIPELINE SO THAT THE ENTIRE EQUATION IS CONSIDERED.*/ for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; call_site = 'a'; /* * Before calling Time_Path, we initialize information about how * this path is being timed, so that later we might avoid unnecessary * re-evaluation. 1/29/99 */ path_ptr->prev_fm_status = MISS; path_ptr->prev_fh_status = MISS; path_ptr->prev_fm_list = tmp_ptr; path_ptr->prev_fh_list = tmp_fh_ptr; if (iterations_handled == 0 && DO_DATA_CACHE) data_status = true; else data_status = false; /*THE TMP_PTR AND TMP_FH_PTR ARE COLLECTING INFO ABOUT FM AND FH AND PASSING THIS TO SUBSEQUENT PATHS.*/ Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, WORST, MISS, MISS, 0, 0, NO_INST, NO_INST, data_status); /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ for (i = 0; i < NUM_STAGES; ++i) if (path_ptr->end_cycles[i] == 0) prev_iter_done[i] = 0; else prev_iter_done[i] = path_cycles - path_ptr->end_cycles[i] + 1; incr_path_cycles = incr_new_total_time(path_cycles, prev_iter_done, path_ptr->beg_cycles, prev_iter_done); if ((path_cycles > worst_path_cycles) && (path_polynomial == NULL)) { worst_path_cycles = incr_path_cycles; if (difference_steady_first < (path_cycles-incr_path_cycles)) difference_steady_first = path_cycles - incr_path_cycles; } else if (path_polynomial != NULL) { need_polynomial = 1; if (tp_polynomial_list) { printf("tp_polynomial_list was NOT NULL!\n"); printf("Before appending polynomial to list.\n"); pretty_print_list(tp_polynomial_list); printf("\n"); prune_polynomial_list(&tp_polynomial_list, path_polynomial); add_polynomial(tp_polynomial_list, path_polynomial); printf("After appending polynomial to list.\n"); pretty_print_list(tp_polynomial_list); printf("\n"); } else { /* make this the first polynomial in the list. */ printf("tp_polynomial_list was NULL!\n"); tp_polynomial_list = path_polynomial; if (tp_polynomial_list->next == NULL) printf("CORRECT\n"); else printf("INCORRECT\n"); } printf("PATH_POLYNOMIAL:\n"); pretty_print_polynomial(path_polynomial); printf("\n"); path_polynomial = NULL; } #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ if (verbose) { printf("JUST FINISHED A PATH WITH FOLLOWING INFO-\n ID = %d\n CYCLES = %d \n NAIVE = %d", path_ptr->number, path_ptr->path_cycles, path_ptr->wc_pipe_nai_time); printf("call site %c %d",call_site,path_ptr->number); } /*TAKING CARE OF THE OVERLAP */ if (PARAMETERIZE) { poss_update_equations (&max_pipeline, &max_latency,path_cycles, path_ptr->total_latency); } else poss_update (&max_pipeline, path_cycles, "update max_pipeline"); } /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ /* find the loop_element for this tnode */ if ((node_ptr->node_type == LNODE) ) { found_loop_element = Find_Loop(inf, node_ptr->loop_header_block); if (found_loop_element && (found_loop_element->need_polynomial == 1)) { need_polynomial = 1; printf("found_loop_element->loop_variable : %s\n", found_loop_element->loop_variable); } } if (need_polynomial == 1) { node_ptr->need_polynomial = 1; if (tp_polynomial_list) { while (tp_polynomial_list) { struct polynomial *temp_polynomial = NULL; struct term *new_term = (struct term*)malloc(sizeof(struct term)); memset(new_term, 0, sizeof(struct term)); memory_count += sizeof(struct term); new_term->power = 1; if (found_loop_element) new_term->variable = found_loop_element->loop_variable; new_term->next = NULL; new_term->previous = NULL; new_term->coefficient_ptr = tp_polynomial_list; temp_polynomial = (struct polynomial*)new_polynomial(); memory_count += sizeof(struct polynomial); add_term_structure(temp_polynomial, new_term); printf("In middle of while loop.\n"); printf("tp_polynomial: "); pretty_print_list(tp_polynomial_list); printf("\n"); if (node_ptr->polynomial_worst_case == NULL) { printf("The node_ptr polynomial was null.\n"); node_ptr->polynomial_worst_case = temp_polynomial; } else { printf("The node_ptr polynomial was not null.\n"); add_polynomial(node_ptr->polynomial_worst_case, temp_polynomial); } printf("temp_polynomial: "); pretty_print_list(temp_polynomial); printf("\n"); printf("\nADDED USING PATH_POLYNOMIAL!\n"); printf("node_ptr polynomial: "); pretty_print_list(node_ptr->polynomial_worst_case); printf("\n"); tp_polynomial_list = tp_polynomial_list->next; } } else { struct term *new_term = (struct term*)malloc(sizeof(struct term)); memset(new_term, 0, sizeof(struct term)); memory_count += sizeof(struct term); new_term->power = 1; if (found_loop_element) new_term->variable = found_loop_element->loop_variable; new_term->next = NULL; new_term->previous = NULL; new_term->coefficient = worst_path_cycles; node_ptr->polynomial_worst_case = (struct polynomial*)new_polynomial(); memory_count += sizeof(struct polynomial); add_term_structure(node_ptr->polynomial_worst_case, new_term); node_ptr->need_polynomial = 1; pretty_print_polynomial(node_ptr->polynomial_worst_case); printf("\nADDED USING A SCALAR PATH_CYCLES!\n"); } } #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ /*READ COMMENT BELOW*/ /* make sure that the tmp_fh_ptr is set to all first hits */ /* reinstalled 8/25 so that fi doesn't loop forever */ /*we call time_path again and tell it to consider all fm as HITs. The path_cycles for all the paths are now changed. */ /*ACTUALLY BECAUSE OF THE BREAK, TIME_PATH IS CALLED ONLY FOR THE FIRST CONTINUE PATH */ for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; call_site = 'b'; path_ptr->prev_fm_status = HIT; path_ptr->prev_fh_status = MISS; path_ptr->prev_fm_list = tmp_ptr; path_ptr->prev_fh_list = tmp_fh_ptr; if (iterations_handled == 0 && DO_DATA_CACHE) data_status = true; else data_status = false; Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, WORST, HIT, MISS, 0, 0, NO_INST, NO_INST, data_status); if (verbose) { printf("JUST FINISHED A PATH WITH FOLLOWING INFO-\n ID = %d\n CYCLES = %d \n NAIVE = %d ", path_ptr->number, path_ptr->path_cycles, path_ptr->wc_pipe_nai_time); printf("call site %c %d",call_site,path_ptr->number); } /*NOTE- THE PATH_CYCLES CALCUALTED ABOVE ARE SENT BELOW, AND NEW VALUSE CALCULATED IF NEEDED. WILL ALSO SEND THE LATENCY INFO */ temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; break; } /*THE FM TMP_PTR IS RESET */ tmp_ptr = NULL; /*READ COMMENT BELOW*/ /*calling time_path again... if the caching info changed from last time base_time is calculated this time*/ for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; path_ptr->already_timed_fmiss = false; /*ONLY CALCULATE IF FM , FH LIST STORED WITH THE PATH CHANGED WRT CURRENT LIST */ if (need2calculate (path_ptr, tmp_ptr, tmp_fh_ptr, HIT, MISS)) { call_site = 'c'; path_ptr->prev_fm_status = HIT; path_ptr->prev_fh_status = MISS; path_ptr->prev_fm_list = tmp_ptr; path_ptr->prev_fh_list = tmp_fh_ptr; if (iterations_handled == 0 && DO_DATA_CACHE) data_status = true; else data_status = false; Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, WORST, HIT, MISS, 0, 0, NO_INST, NO_INST, data_status); if (verbose) { printf("JUST FINISHED A PATH WITH FOLLOWING INFO-\n ID = %d\n CYCLES = %d \n NAIVE = %d ", path_ptr->number, path_ptr->path_cycles, path_ptr->wc_pipe_nai_time); printf("call site %c %d",call_site,path_ptr->number); } temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } else { path_cycles = path_ptr->path_cycles; temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } if (PARAMETERIZE) { poss_update_equations (&base_time, &base_latency,path_cycles, temp_latency); } else poss_update (&base_time, path_cycles, "update base time"); } if (verbose) printc ("Before going thru paths again, base time is %d\n", base_time); /*in an infinite loop */ /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ while (1) { longest_fmiss_path_ptr = NULL; longest_fmiss_fm_list = NULL; longest_fmiss_time = 0; for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; Copy_Cache_List (&tmp_ptr, cum_fm_list); call_site = 'd'; /* * thanks to "loop.c" I have to turn this off and call Time_Path * every time. 2/6/99 */ /*calling time_path for the 4th time... fm as HIT path_cycles for each path will be changed again */ if (1 || need2calculate (path_ptr, tmp_ptr, tmp_fh_ptr, HIT, MISS)) { path_ptr->prev_fm_status = HIT; path_ptr->prev_fh_status = MISS; path_ptr->prev_fm_list = tmp_ptr; path_ptr->prev_fh_list = tmp_fh_ptr; if (iterations_handled == 0 && DO_DATA_CACHE) data_status = true; else data_status = false; Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, WORST, HIT, MISS, 0, 0, NO_INST, NO_INST, data_status); if (verbose) { printf("JUST FINISHED A PATH WITH FOLLOWING INFO-\n ID = %d\n CYCLES = %d \n NAIVE = %d ", path_ptr->number, path_ptr->path_cycles, path_ptr->wc_pipe_nai_time); printf("call site %c %d",call_site,path_ptr->number); } temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } else { path_cycles = path_ptr->path_cycles; temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ take_branch = false; if (PARAMETERIZE) { take_branch = compare_equations(path_cycles, temp_latency + temp_extra_time, longest_fmiss_time, longest_fmiss_latency,&path_cond); } else if (path_cycles + path_ptr->extra_time > longest_fmiss_time) take_branch = true; if (take_branch) { /* * we should not count the shared fm time in the extra time here * because it is on the side and not part of the base time that * we are comparing to shortly below 2/14/99 */ /*ADDING VARIABLES FOR FREQUENCY MODEL */ if (PARAMETERIZE) { if (!path_cond) { longest_fmiss_latency = path_ptr->total_latency + path_ptr->extra_time - path_ptr->shared_fm_time; longest_fmiss_time = path_cycles ; }else eq_merge(&longest_fmiss_latency,&longest_fmiss_time, path_ptr->total_latency + path_ptr->extra_time, path_cycles); } else { longest_fmiss_latency = path_ptr->total_latency + path_ptr->extra_time/miss_delay - path_ptr->shared_fm_time/miss_delay; longest_fmiss_time = path_cycles + path_ptr->extra_time - path_ptr->shared_fm_time; } longest_fmiss_path_ptr = path_ptr; Copy_Cache_List (&longest_fmiss_fm_list, tmp_ptr); if (verbose) { printc ("longest_fmiss_time set to %d\n", longest_fmiss_time); printc ("longest first miss list...\n"); Print_Cache_List (longest_fmiss_fm_list); } } } /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ take_branch = false; if (PARAMETERIZE) { if (base_time == longest_fmiss_time && base_latency == longest_fmiss_latency) take_branch = true; else { take_branch = compare_equations(base_time, base_latency, longest_fmiss_time, longest_fmiss_latency,&path_cond); } } else if (longest_fmiss_time <= base_time) take_branch = true; if (longest_fmiss_path_ptr == NULL || take_branch) break; cum_extra_time += longest_fmiss_time - base_time; if (PARAMETERIZE) cum_extra_latency += longest_fmiss_latency - base_latency; Union_Cache_List (&cum_fm_list, longest_fmiss_fm_list); longest_fmiss_path_ptr->already_timed_fmiss = true; if (verbose) printc ("cumulative extra time is now %d\n", cum_extra_time); /* * 2/19/01: If the longest fm list is empty, then I think * we should exit the loop. (sumcntmatrix1) */ if (longest_fmiss_fm_list == NULL) break; } /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ /* * While-loop to enclose first two phases. 11/22/98 */ while (iterations_handled < abs_max_iterations) { /* * First phase of algorithm is a do-while to find steady state. */ do { /* * Re-calculate max_pipeline based on the set of paths that * are available 1/14/99 * and maybe we shouldn't do it on the last iteration * Also, skip paths if the corresponding range has been exhasted 1/19/99 */ max_pipeline = 0; max_latency = 0; if (verbose) printc ("Restarting max_pipeline at 0.\n"); for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; if (! ignore_value && iterations_remaining > iterations_required && ! (path_ptr->required_iters_left > 0 || path_ptr->nonrequired_iters_left > 0) ) continue; if (! ignore_value && iterations_remaining == iterations_required && ! (path_ptr->required_iters_left > 0) ) continue; if (! ignore_value) { /* Addition by Sibin M on Sept 30, 2004. * Added the initial condition checking for validity of range_ptr, 'cos the find_paths_range() * function returns a '0' on not finding the range. */ range_ptr = find_paths_range (node_ptr, path_ptr); if ( (!range_ptr) || (range_ptr->max_iter == 0) ) continue; /* Addition by Sibin M on Sept 30, 2004. * Added the initial condition checking for validity of range_ptr, 'cos the find_paths_range() * function returns a '0' on not finding the range. */ } if (! ignore_value && infeasible_continue(node_ptr, path_ptr, WORST)) continue; Copy_Cache_List (&tmp_ptr, loop_fm_list); Copy_Cache_List (&tmp_fh_ptr, loop_fh_list); call_site = 'e'; /*time_path is called again ...5th time.. if cache status changed. max_pipeline is calculated again*/ if (need2calculate (path_ptr, tmp_ptr, tmp_fh_ptr, MISS, MISS)) { path_ptr->prev_fm_status = MISS; path_ptr->prev_fh_status = MISS; path_ptr->prev_fm_list = tmp_ptr; path_ptr->prev_fh_list = tmp_fh_ptr; if (iterations_handled == 0 && DO_DATA_CACHE) data_status = true; else data_status = false; Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, WORST, MISS, MISS, 0, 0, NO_INST, NO_INST, data_status); if (verbose){ printf("JUST FINISHED A PATH WITH FOLLOWING INFO-\n ID = %d\n CYCLES = %d \n NAIVE = %d ", path_ptr->number, path_ptr->path_cycles, path_ptr->wc_pipe_nai_time); printf("call site %c %d",call_site,path_ptr->number); } temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } else { /*MARKED - NEED TO ADD VARIABLES BELOW*/ path_cycles = path_ptr->path_cycles; temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } if (PARAMETERIZE) { poss_update_equations (&max_pipeline, &max_latency,path_cycles, temp_latency); } else poss_update (&max_pipeline, path_cycles, "update max_pipeline"); } /* * We need to copy end_head to prev_end_head, initialize end_head. * For each available path below, we calculate the current union * in end_head. After that, we find structural hazards based on * the prev_end_head. 12/13/98 */ if (verbose) printc ("at beginning of phase 1, iterations_handled == %d\n" "about to copy end_head into prev_end_head.\n", iterations_handled); for (stage = 0; stage < NUM_STAGES; ++stage) { prev_end_head[stage] = end_head[stage]; end_head[stage] = NULL; } /* * 1. Compute iterations_remaining to simplify future comparisons. */ iterations_remaining = abs_max_iterations - iterations_handled; if (verbose) printc ("iterations remaining = %d\n", iterations_remaining); /*MARKED - NEED TO ADD VARIABLES BELOW*/ longest_path_time = 0; longest_path_latency = 0; longest_pipeline_time = 0; longest_pipeline_latency = 0; for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; /* * 2. If iterations_remaining > iterations_required, * then we select WC continue path among the paths in * which either the required or nonrequired iters left > 0 * Otherwise we select WC continue path among the paths * whose required iters left > 0. * Also, skip paths if the corresponding range has been exhasted 1/19/99 */ if (! ignore_value && iterations_remaining > iterations_required && ! (path_ptr->required_iters_left > 0 || path_ptr->nonrequired_iters_left > 0) ) continue; if (! ignore_value && iterations_remaining == iterations_required && ! (path_ptr->required_iters_left > 0) ) continue; if (! ignore_value) { range_ptr = find_paths_range (node_ptr, path_ptr); /* Addition by Sibin M on Sept 30, 2004. * Added the initial condition checking for validity of range_ptr, 'cos the find_paths_range() * function returns a '0' on not finding the range. */ if( ( !range_ptr ) || (range_ptr->max_iter == 0) ) continue; /* End of addition by Sibin M on Sept 30, 2004. * Added the initial condition checking for validity of range_ptr, 'cos the find_paths_range() * function returns a '0' on not finding the range. */ } if (! ignore_value && infeasible_continue(node_ptr, path_ptr, WORST)) continue; Copy_Cache_List (&tmp_ptr, loop_fm_list); Copy_Cache_List (&tmp_fh_ptr, loop_fh_list); call_site = 'f'; if (need2calculate (path_ptr, tmp_ptr, tmp_fh_ptr, MISS, (iterations_handled == 0) ? HIT : MISS)) { path_ptr->prev_fm_status = MISS; path_ptr->prev_fh_status = (iterations_handled == 0) ? HIT : MISS; path_ptr->prev_fm_list = tmp_ptr; path_ptr->prev_fh_list = tmp_fh_ptr; if (iterations_handled == 0 && DO_DATA_CACHE) data_status = true; else data_status = false; Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, WORST, MISS, (iterations_handled == 0) ? HIT : MISS, 0, 0, NO_INST, NO_INST, data_status); if (verbose) { printf("JUST FINISHED A PATH WITH FOLLOWING INFO-\n ID = %d\n CYCLES = %d \n NAIVE = %d ", path_ptr->number, path_ptr->path_cycles, path_ptr->wc_pipe_nai_time); printf("call site %c %d",call_site,path_ptr->number); } temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } else { /*MARKED - NEED TO ADD VARIABLES BELOW */ path_cycles = path_ptr->path_cycles; temp_latency = path_ptr->total_latency; temp_extra_time = path_ptr->extra_time; } if (PARAMETERIZE) { poss_update_equations(&longest_pipeline_time,&longest_pipeline_latency, path_cycles,temp_latency); } else poss_update (&longest_pipeline_time, path_cycles, ""); if (naive_too) { poss_update (&longest_pc_nai_path_time, path_ptr->wc_pc_nai_time, ""); poss_update (&longest_pipe_nai_path_time, path_ptr->wc_pipe_nai_time, ""); } #ifdef INTERFACE path_ptr->max_cycle_fmiss = path_cycles + path_ptr->extra_time; #endif /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ take_branch = false; if (PARAMETERIZE) { take_branch = compare_equations(path_cycles, temp_latency + temp_extra_time, longest_path_time,longest_path_latency,&path_cond); } else if (path_cycles + path_ptr->extra_time > longest_path_time) take_branch = true; if (take_branch) { longest_path_ptr = path_ptr; if (PARAMETERIZE) { if (!path_cond) { longest_path_latency = path_ptr->total_latency + path_ptr->extra_time; longest_path_time = path_cycles ; } else { eq_merge(&longest_path_latency,&longest_path_time,path_ptr->total_latency + path_ptr->extra_time,path_cycles); } } else { longest_path_latency = path_ptr->total_latency + path_ptr->extra_time/miss_delay ; longest_path_time = path_cycles + path_ptr->extra_time; } /* we need to copy the fm list that Time_Path updated */ Copy_Cache_List ( ( &longest_path_ptr->worst_fm_list ), tmp_ptr); Copy_Cache_List ( (cache_block**)( &longest_path_ptr->worst_fh_list ), tmp_fh_ptr); } /* ============================================================= */ /* create a union of paths including this newly timed one (4/25) */ if (verbose) printc ("Create a union of paths including this newly timed one.\n"); /* update union's register info (started 4/30) */ for (i = 0; i < NUMREGS; ++i) { if ( ( union_release_time[i] == NEVER && path_ptr->release_time[i] != NEVER ) || (path_ptr->release_time[i] < union_release_time[i]) ) { union_release_time[i] = path_ptr->release_time[i]; } if ( ( union_src_needed[i] == NEVER && path_ptr->src_needed[i] != NEVER ) || (path_ptr->src_needed[i] < union_src_needed[i]) ) { union_src_needed[i] = path_ptr->src_needed[i]; union_src_stage[i] = path_ptr->src_stage[i]; } } if (verbose) print_reg_info(union_src_needed, union_src_stage, union_release_time); /* before getting the end_head of the union, check whether this path is longer than max_pipeline; if so increment the cycles assoc with the end cycles of the union so far determined (9/25) */ if (verbose) printc ("for path %d, path_cycles = %d while max_pipeline = %d\n", path_ptr->number, path_cycles, max_pipeline); /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ take_branch = false; if (PARAMETERIZE) { take_branch = compare_equations(path_cycles, path_ptr->total_latency, max_pipeline, max_latency,&path_cond); } else if (path_cycles > max_pipeline) { take_branch = true; } if (take_branch) { /*SHOULD I PUT FREQUENCY MODEL VARIABLES HERE?*/ for (stage = 0; stage < NUM_STAGES; ++stage) { if (end_head[stage]) { end_head[stage]->cycles += path_cycles - max_pipeline; } } if (!path_cond) { max_pipeline = path_cycles; max_latency = path_ptr->total_latency; } else eq_merge(&max_latency, &max_pipeline, path_ptr->total_latency,path_cycles); if (verbose) printc ("max_pipeline becomes value of path_cycles: %d\n", max_pipeline); } /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (beg_head[stage]) { if (path_ptr->beg_cycles[stage] < beg_head[stage]->cycles) { beg_head[stage]->cycles = path_ptr->beg_cycles[stage]; beg_head[stage]->occupant = path_ptr->beg_occupant[stage]; } } else if (path_ptr->beg_occupant[stage]) { memory_count += 12; beg_temp[stage] = (struct union_list_node *) malloc (sizeof (struct union_list_node)); beg_temp[stage]->occupant = path_ptr->beg_occupant[stage]; beg_temp[stage]->cycles = path_ptr->beg_cycles[stage]; beg_temp[stage]->next = beg_head[stage]; beg_head[stage] = beg_temp[stage]; } if (end_head[stage]) { end_head[stage]->occupant = path_ptr->end_occupant[stage]; /*MARKED- COMPARING 2 EQUATIONS BELOW*/ take_branch = false; if (PARAMETERIZE) { take_branch = compare_equations(max_pipeline, max_latency, path_cycles, path_ptr->total_latency,&path_cond); } else if (path_cycles < max_pipeline) take_branch = true; if (take_branch) { if (path_ptr->end_cycles[stage] + (max_pipeline - path_cycles) < end_head[stage]->cycles) end_head[stage]->cycles = path_ptr->end_cycles[stage] + (max_pipeline - path_cycles); } else { if (path_ptr->end_cycles[stage] < end_head[stage]->cycles) end_head[stage]->cycles = path_ptr->end_cycles[stage]; } /*IMPORTANT COMMENT */ /* (9/19) Note that we are adding max_pipeline - path_cycles to the end cycles of the path for the stage to account for the possiblility that some paths are longer than others */ if (take_branch) if (verbose) printc ("stage %d: I had to adjust end cycles by %d\n", stage, max_pipeline - path_cycles); } /*if (beg_head[stage]) */ else if (path_ptr->end_occupant[stage]) { memory_count += 12; end_temp[stage] = (struct union_list_node *) malloc (sizeof (struct union_list_node)); end_temp[stage]->occupant = path_ptr->end_occupant[stage]; /*MARKED- COMPARING 2 EQUATIONS BELOW*/ take_branch = false; if (PARAMETERIZE) { take_branch = compare_equations(max_pipeline, max_latency, path_cycles, path_ptr->total_latency,&path_cond); } else if (path_cycles < max_pipeline) take_branch = true; if (take_branch) end_temp[stage]->cycles = path_ptr->end_cycles[stage] + (max_pipeline - path_cycles); else end_temp[stage]->cycles = path_ptr->end_cycles[stage]; if (take_branch) if (verbose) printc ("stage %d: I had to adjust end cycles by %d\n", stage, max_pipeline - path_cycles); end_temp[stage]->next = end_head[stage]; end_head[stage] = end_temp[stage]; } } if (verbose) { printc("Here is the path's union after %d iterations of loop:\n", iterations_handled + 1); print_union (beg_head, end_head); } } /* end of timing all the continue paths */ if (verbose) { printc ("on iter %d, path %d is the longest\n", iterations_handled + 1, longest_path_ptr->number); printc ("longest pipeline time is %d\n", longest_pipeline_time); } /* * reduce the max_iter of the range corresponding to the longest path * 1/19/99 */ if (! ignore_value) { for (range_ptr = node_ptr->range_list; range_ptr; range_ptr = range_ptr->next) if (range_ptr->number == longest_path_ptr->range_index) break; /* Addition by Sibin M on Sept 30, 2004. * Check to see if the range_ptr is a valid value. * The enclosing "if" condition added newly. */ if( range_ptr ) { --range_ptr->max_iter; if (verbose) printc ("about to reduce range %d by one to %d\n", range_ptr->number, range_ptr->max_iter); } /* End of addition by Sibin M on Sept 30, 2004. * Check to see if the range_ptr is a valid value. * The enclosing "if" condition added newly. */ } fm_encountered = longest_path_ptr->fm_encountered; fh_encountered = longest_path_ptr->fh_encountered; /* ================================================================== */ /* accumulate total_time, taking this new wc path as a next iteration */ if (verbose) { printc ("checking for structural hazard for phase 1.\n" "Here is union based on beg_head & prev_end_head:\n"); print_union (beg_head, prev_end_head); } if (iterations_handled == 0 || only_fetch || cache_only) { total_time += longest_path_ptr->path_cycles; total_latency += longest_path_ptr->total_latency ;//+longest_path_ptr->dcache_misses; total_icache += longest_path_ptr->icache_misses; total_dcache += longest_path_ptr->dcache_misses; total_overlap += longest_path_ptr->overlap; avg_total_time += longest_path_ptr->path_cycles; avg_total_latency += longest_path_ptr->total_latency; if (visa_worst_case < longest_path_ptr->path_cycles) { //printf("1 %d %d %d %d\n",visa_worst_case,longest_path_ptr->path_cycles,iterations_handled,total_time); visa_worst_case = longest_path_ptr->path_cycles; } } else { /* * Let's examine the end cycles of the loop before this iteration, * and the starting and finishing times of the stages of this * new iteration. 2/20/96 */ /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ /* Added by Sibin M on April 27, 2004. * Check to see if any of the instructions in the path * may have a variable number of cycles( eg. : sbrs, sbis... ). * Make corresponding adjustments. */ variable_cycles = adjust_for_variable_cycles( inf, node_ptr->loop_paths->path, longest_path_ptr, end_head, prev_end_head, path_cycles, max_pipeline ) ; // Now deduct the excess time from total_time, max_pipeline, path_cycles, longest_pipeline_time, longest_poth_time. total_time -= variable_cycles ; path_cycles -= variable_cycles ; max_pipeline -= variable_cycles ; longest_pipeline_time -= variable_cycles ; longest_path_time -= variable_cycles ; /* End of Addition by Sibin M on April 27, 2004. */ for (i = 0; i < NUM_STAGES; ++i) { if (! prev_end_head[i]) prev_iter_done[i] = 0; else prev_iter_done[i] = total_time - prev_end_head[i]->cycles ; // + 1; /* Above change made by Sibin M on April 28, 2004. * Removed the +1 at the end of the calculation for prev_iter_done. * Is this correct though ???? - Check this up again. */ new_iter_start[i] = longest_path_ptr->beg_cycles[i]; if (longest_path_ptr->end_cycles[i] == 0) new_iter_done[i] = 0; else new_iter_done[i] = longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[i] ; // + 1; /* Above change made by Sibin M on April 28, 2004. * Removed the +1 at the end of the calculation for new_iter_done. * Is this correct though ???? - Check this up again. */ if (! prev_end_head[i] || new_iter_start[i] == 0) sums[i] = 0; else sums[i] = prev_end_head[i]->cycles + new_iter_start[i]; } smallest_sum = smallest_entry (sums, 0, NUM_STAGES); /*MARKED - WEIRD HANDLING OF SPECIAL CASE */ if (smallest_sum < 6) { if (verbose) printc ("Adding %d to prev total time, struct haz with prev iter\n", 6 - smallest_sum); total_time += 6 - smallest_sum; if (iterations_handled < max_iterations) avg_total_time += 6 - smallest_sum; visa_worst_case += 6 - smallest_sum; } if (verbose) { printc ("prev total time = %d\n", total_time); printc ("new iter time = %d\n", longest_path_ptr->path_cycles); print_array (prev_iter_done, 0, NUM_STAGES, "prev iter done"); print_array (new_iter_start, 0, NUM_STAGES, "new iter start"); print_array (new_iter_done, 0, NUM_STAGES, "new iter done "); print_array (sums, 0, NUM_STAGES, "sums "); printc ("smallest sum is %d\n", smallest_sum); } /* * find the difference between the max and the min of prev_iter_done * and the max of new_iter_done */ if (verbose) { printc ("the range of prev iter done is %d\n", largest_entry (prev_iter_done, 0, NUM_STAGES) - smallest_entry(prev_iter_done, 0, NUM_STAGES)); printc ("max of new_iter_done is %d\n", largest_entry (new_iter_done, 0, NUM_STAGES)); } range_prev_iter_done = largest_entry (prev_iter_done, 0, NUM_STAGES) - smallest_entry(prev_iter_done, 0, NUM_STAGES); max_new_iter_done = largest_entry (new_iter_done, 0, NUM_STAGES); /* * Need to generalize this, and probably apply it elsewhere 12/13/98 */ /* if (max_new_iter_done > range_prev_iter_done) */ incr = incr_new_total_time (total_time, prev_iter_done, new_iter_start, new_iter_done); total_time += incr; /*bruhaha*/ if (visa_worst_case < incr) { visa_worst_case = incr; } /*SINCE WE WANT THE D$ ACCESSES TO BE FIRST MISSES, LET US SUBTRACT THE D$ ACCESSES SEEN IN THE 2nd ITERATION ONWARDS*/ total_latency += longest_path_ptr->total_latency;//+longest_path_ptr->dcache_misses; total_icache += longest_path_ptr->icache_misses; total_dcache += longest_path_ptr->dcache_misses; total_overlap += longest_path_ptr->overlap; if (iterations_handled < max_iterations) { avg_total_time += incr; avg_total_latency += longest_path_ptr->total_latency; } /* total_time += longest_path_ptr->path_cycles - 4; if (iterations_handled < max_iterations) avg_total_time += longest_path_ptr->path_cycles - 4; */ } total_extra_fm_time += longest_path_ptr->extra_fm_time; total_extra_fh_time += longest_path_ptr->extra_fh_time; /* * keep track of the shared fm time 2/9/99 */ poss_update (&total_shared_fm_time, longest_path_ptr->shared_fm_time, "total_shared_fm_time"); /*MARKED - NEED TO ADD VARIABLES TO COMPARE 2 EQUATIONS BELOW */ /*NOTE- CHECK WHATIS DONE BELOW */ take_branch = true; if (PARAMETERIZE) { take_branch = compare_equations(longest_pipeline_time, longest_pipeline_latency, longest_path_ptr->path_cycles, longest_path_ptr->total_latency ,&path_cond); } else if (longest_pipeline_time > longest_path_ptr->path_cycles) take_branch = true; if (take_branch) { if (verbose) printc ("HEY - we need to adjust the pipeline & extra times\n"); diff = longest_pipeline_time - longest_path_ptr->path_cycles; total_time += diff; visa_worst_case +=diff; if (PARAMETERIZE) { diff = longest_pipeline_latency - longest_path_ptr->total_latency; total_latency += diff; temp_diff += diff; } else { total_extra_fm_time -= diff; if (total_extra_fm_time < 0) total_extra_fm_time = 0; /* can't be negative: fixed 1/29/99 */ } } /*MARKED - WRAP AROUND CACHE STUFF.. IGNORED */ #ifdef TRASH /* if (wrap_around && iterations_handled == 0) { total_first_line_delay = longest_path_ptr->first_line_delay; if (verbose) { printc ("Here is longest path's delay after fm list...\n "); print_delay_after_fm_list (longest_path_ptr->delay_after_fm_head); } for (delay_after_fm_temp = longest_path_ptr->delay_after_fm_head; delay_after_fm_temp; delay_after_fm_temp = delay_after_fm_temp->next) { if (verbose) printc ("examining inst %d\n", delay_after_fm_temp->inst); cblock = find_fm_cblock (delay_after_fm_temp->inst, longest_path_ptr->worst_fm_list); if (!cblock || cblock->cat_list->worst_case == 'm') { if (verbose) printc ("add %d to total time & take it from delay after fm\n", delay_after_fm_temp->cycles); total_time += delay_after_fm_temp->cycles; longest_path_ptr->delay_after_fm = longest_path_ptr->delay_after_fm - delay_after_fm_temp->cycles; } else { if (verbose) printc ("status quo - keep %d cycles with delay after fm\n", delay_after_fm_temp->cycles); total_delay_after_fm = longest_path_ptr->delay_after_fm; } } } else if (wrap_around) { if (verbose){ printc ("Here is longest path's delay after fm list...\n "); print_delay_after_fm_list (longest_path_ptr->delay_after_fm_head); } for (delay_after_fm_temp = longest_path_ptr->delay_after_fm_head; delay_after_fm_temp; delay_after_fm_temp = delay_after_fm_temp->next) { if (verbose) printc ("examining inst %d\n", delay_after_fm_temp->inst); cblock = find_fm_cblock (delay_after_fm_temp->inst, longest_path_ptr->worst_fm_list); if (!cblock || cblock->cat_list->worst_case == 'm') { if (verbose) printc ("add %d to total time & take it from delay after fm\n", delay_after_fm_temp->cycles); total_time += delay_after_fm_temp->cycles; } } } */ #endif /* ================================================================ */ /* concatenate this new wc path onto the pipeline (NB data hazards) */ if (iterations_handled == 0) /* then loop is just 1 iter so far */ { for (stage = 0; stage < NUM_STAGES; ++stage) { loop_beg_head[stage] = beg_head[stage]; loop_end_head[stage] = end_head[stage]; } } else { /* need to look for data hazards between loop_end_head and the new path's beg_head. */ delay = 0; find_reg_delay (&delay, union_release_time, union_src_needed); if (verbose) printc ("delay from iter %d to %d of loop is %d\n", iterations_handled, iterations_handled + 1, delay); total_time += delay; visa_worst_case += delay; if (iterations_handled < max_iterations) avg_total_time += delay; } /* now copy the path's end_head to loop_end_head */ /* What if this path doesn't use some stages that have been used before? I think we need to update the cycle #'s for the neglected stages. */ /* * Also, don't copy the end_head's stage if it is empty, so that * we will not be erasing some information from another path that * is already in loop_end_head, like a floating-point stage. 12/13/98 */ if (verbose) { printc ("The following end_head will be copied to loop_end_head\n" " (ignore the begin info)\n"); print_union (beg_head, end_head); } for (stage = 0; stage < NUM_STAGES; ++stage) { if (end_head[stage]) loop_end_head[stage] = end_head[stage]; } /* ================================================================ * Dectect possibility of delay on 2nd iteration when (near) end of * 1st iteration brings in a cache line. 2/5/96. */ /*MARKED - WRAP AROUND CACHE INFO .. IGNORED WRT FREQUENCY MODEL*/ #ifdef TRASH /* if (wrap_around) if (verbose) print_cache_info(longest_path_ptr); */ if (wrap_around && iterations_handled == 0) { /* use end avail time or fm end avail time, whichever is more * recent. 2/26/96 * replaced loop_end_head[IF]->cycles with * longest_path_ptr->end_cycles[IF] 2/27/96 */ if (longest_path_ptr->end_avail_time[0] > longest_path_ptr->fm_end_avail_time[0]) { /* * a different approach for the case where we are * dealing with same line 3/11/96 */ if (longest_path_ptr->beg_cache_line == longest_path_ptr->end_cache_line) { diff = longest_path_ptr->end_avail_time [longest_path_ptr->first_word_num] - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 2); } else { diff = largest_entry (longest_path_ptr->end_avail_time, 0, words_per_line) - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 1); } } else { if (longest_path_ptr->beg_cache_line == longest_path_ptr->end_cache_line) { diff = longest_path_ptr->fm_end_avail_time [longest_path_ptr->first_word_num] - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 2); } else { diff = largest_entry (longest_path_ptr->fm_end_avail_time, 0, words_per_line) - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 1); } } if (verbose) printc ("diff = %d\n", diff); poss_update (&diff, 0, "diff can't be negative"); node_ptr->last_fm = longest_path_ptr->last_fm; /* * Accumulate the last line delay we just discovered with the rest * of it (accrued from deeper levels). 2/16/96 */ if (longest_path_ptr->end_avail_time[0] > longest_path_ptr->fm_end_avail_time[0]) { total_last_line_delay = diff + longest_path_ptr->last_line_delay; } else { total_last_line_delay = diff + longest_path_ptr->last_line_delay; } /* something else to add on 1st iteration only 3/4/96*/ total_fm_dead_cycles += longest_path_ptr->fm_dead_cycles; } else if (wrap_around) { /* if iterations_handled > 0 */ /* do the same here as we did for the steady state iterations * below 3/1/96 */ if (longest_path_ptr->beg_cache_line == longest_path_ptr->end_cache_line) { diff = longest_path_ptr->end_avail_time [longest_path_ptr->first_word_num] - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 2); } else { diff = largest_entry (longest_path_ptr->end_avail_time, 0, words_per_line) - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 1); } if (verbose) printc ("diff = %d\n", diff); poss_update (&diff, 0, "diff can't be negative"); /* At this point we should be able to recognize that 16 is in last fm and is in fm list 3/1/96 */ cblock = find_fm_cblock (node_ptr->last_fm, longest_path_ptr->worst_fm_list); /* I'm adding the last line delay into 2 different places. 3/7/96 */ if (!cblock || cblock->cat_list->worst_case == 'm' || longest_path_ptr->last_miss_cat == 'm' /* or the reason of last line delay is an always miss */ ) { total_time += diff; visa_worst_case +=diff; if (verbose) { printc ("I just added %d to total pipeline time\n", diff); print_array (prev_iter_done, 0, NUM_STAGES, "prev iter done"); print_array (new_iter_start, 0, NUM_STAGES, "new iter start"); } for (i = 0; i < NUM_STAGES; ++i) diffs[i] = prev_iter_done[i] - new_iter_start[i]; if (verbose) printc ("should I reduce by max diff %d + the delay %d?\n", largest_entry (diffs, 0, NUM_STAGES) - diffs[0], delay); reduce = largest_entry (diffs, 0, NUM_STAGES) - diffs[0] + delay; /* I should be able to overlap somehow 3/20/96 */ if (reduce > 0 && reduce < diff) { if (verbose) printc ("I'm taking %d away from total pipeline time.\n", reduce); total_time -= reduce; } else if (reduce >= diff) { if (verbose) printc("I'm taking the whole diff %d from total pipeline time.\n", diff); total_time -= diff; } if (!cblock || cblock->cat_list->worst_case == 'm' ) total_last_line_delay += longest_path_ptr->last_line_delay; } } #endif /*WRAP AROUND STUFF ENDS HERE */ if (verbose){ print_components (iterations_handled + 1, total_time, total_extra_fh_time, total_extra_fm_time, total_shared_fm_time, total_first_line_delay, total_delay_after_fm, total_last_line_delay, total_fm_dead_cycles); printc ("Here is the loop's union so far:\n"); print_union (loop_beg_head, loop_end_head); } /* ====================================================== */ /* update loop's fm list using the longest path's fm list */ Union_Cache_List (&loop_fm_list, longest_path_ptr->worst_fm_list); Union_Cache_List (&loop_fh_list, (struct cache_block*)( longest_path_ptr->worst_fh_list ) ) ; /* * 3. Keep track of this iteration that is being handled. */ ++iterations_handled; /* * 4. Decrement total and path's iterations required if this * path is required to execute. */ if (longest_path_ptr->required_iters_left > 0) { --iterations_required; --longest_path_ptr->required_iters_left; } else { --longest_path_ptr->nonrequired_iters_left; } if (verbose) printc ("Longest path available is path %d. After taking it,\n" " iterations_handled = %d\n" " iterations_required = %d\n" " longest path's required iters left = %d\n" " longest path's nonrequired iters left = %d\n", longest_path_ptr->number, iterations_handled, iterations_required, longest_path_ptr->required_iters_left, longest_path_ptr->nonrequired_iters_left); /* * 5. We break out of the do-while if we have no more fm or fh * or we run out of iterations (before the exit). */ } while (iterations_handled < max_iterations && ( fm_encountered > 0 || (iterations_handled == 1 && fh_encountered > 0) ) ); if (naive_too) { total_pc_nai_time = (max_iterations) * longest_pc_nai_path_time; total_pipe_nai_time = (max_iterations) * longest_pipe_nai_path_time; } /* here initialize abs_total_time to be the time of the iterations only encountered for the absolute max number of iterations 24 sept 97 */ /*MARKED - MIGHT HAVE TO ADD FREQ MODEL STUFF HERE */ if (abs_max_iterations > max_iterations) { abs_total_time = (longest_path_ptr->path_cycles - 4) * (abs_max_iterations - iterations_handled); abs_total_latency = (longest_path_ptr->total_latency) * (abs_max_iterations - iterations_handled); } if (verbose) { printc ("\nTime for replication if iterations_handled < "); printc ("max_iterations or exit path.\n"); printc ("iterations_handled = %d, max_iterations = %d\n", iterations_handled, max_iterations); printc ("initializing abs_total_time to %d\n", abs_total_time); } /* ================================================================= * This is the second phase of the loop analysis algorithm. * replicate last wc path for remaining iterations, except the n-th. * Note that any inter-iteration data hazard will be constant. * and update total_time, wary of possible overlap. */ if (verbose) printc ("BEGIN phase 2\n"); if (iterations_handled < max_iterations - 1) { /* * 1. number of iterations left before the exit path is * max_iterations - iterations_handled */ iterations_remaining = abs_max_iterations - iterations_handled; if (verbose) printc ("iterations remaining (before exit) = %d\n", iterations_remaining); /* * 2. Number of iterations to do for this replication. * If the longest path's required iters left > 0 then * iterations_to_do = longest path's required iters left * decrement amount = iterations_to_do * Reduce the iterations required by the decrement amount. * Set the longest path's required iters left to zero. * Otherwise * iterations to do = smaller of: * longest path's nonrequired iters left AND * iterations_remaining - iterations_required * Reduce the longest path's nonrequired iters left by this # */ if (longest_path_ptr->required_iters_left > 0) { iterations_to_do = longest_path_ptr->required_iters_left; iterations_required -= iterations_to_do; longest_path_ptr->required_iters_left = 0; if (verbose) printc ("longest path is %d\n" "Using up the longest path's required iterations.\n" " iterations to do = %d\n" " iterations still required = %d\n", longest_path_ptr->number, iterations_to_do, iterations_required); } else { iterations_to_do = Min (longest_path_ptr->nonrequired_iters_left, iterations_remaining - iterations_required); longest_path_ptr->nonrequired_iters_left -= iterations_to_do; if (verbose) printc ("longest path is %d\n" "longest path has no more required iterations.\n" " longest path has %d nonrequired iters left\n" " iterations to do = %d\n" " longest path's nonrequired iter left = %d\n", longest_path_ptr->number, longest_path_ptr->nonrequired_iters_left, iterations_to_do, longest_path_ptr->nonrequired_iters_left); } /* * Also appropriate to copy end_head into prev_end_head so we will have * the latest union to detect structural hazards. 12/13/98 * We should do this after we know how many iters to do, since * if we have no iters to do, we are not updating what the previous * path was (we are not taking any new path yet). */ if (iterations_to_do == 0) continue; /* quit phase 2; go back to phase 1 */ for (stage = 0; stage < NUM_STAGES; ++stage) { prev_end_head[stage] = end_head[stage]; } /* * reduce the range's max_iter by iterations_to_do 1/19/99 */ if (! ignore_value) { for (range_ptr = node_ptr->range_list; range_ptr; range_ptr = range_ptr->next) if (range_ptr->number == longest_path_ptr->range_index) break; /* Addition by Sibin M on Sept 30, 2004. * Check to see if the range_ptr is a valid value. * The enclosing "if" condition added newly. */ if( range_ptr ) { range_ptr->max_iter -= iterations_to_do; if (verbose) printc ("I just reduced range %d max_iter to %d.\n", range_ptr->number, range_ptr->max_iter); } /* Addition by Sibin M on Sept 30, 2004. * Check to see if the range_ptr is a valid value. * The enclosing "if" condition added newly. */ } /* * 3. We also need to modify this second phase of the algorithm * so that we are replicating iterations_to_do iterations, * not max_iterations - iterations_handled */ /*MARKED - */ if (only_fetch || cache_only) { total_time += longest_path_ptr->path_cycles * iterations_to_do; total_latency += longest_path_ptr->total_latency * iterations_to_do; total_icache += longest_path_ptr->icache_misses * iterations_to_do;; total_dcache += longest_path_ptr->dcache_misses * iterations_to_do;; total_overlap += longest_path_ptr->overlap *iterations_to_do; temp_diff += longest_path_ptr->diff *iterations_to_do; if (visa_worst_case < longest_path_ptr->path_cycles){ visa_worst_case= longest_path_ptr->path_cycles; } if (iterations_handled < max_iterations) { avg_total_time += longest_path_ptr->path_cycles * Min(iterations_to_do, max_iterations - iterations_handled); avg_total_latency += longest_path_ptr->total_latency *Min(iterations_to_do, max_iterations - iterations_handled); } } else { /* * Let's examine the end cycles of the loop before this iteration, * and the starting and finishing times of the stages of this * new iteration. 2/20/96 */ /* Added by Sibin M on April 27, 2004. * Check to see if any of the instructions in the path * may have a variable number of cycles( eg. : sbrs, sbis... ). * Make corresponding adjustments. */ variable_cycles = adjust_for_variable_cycles( inf, node_ptr->loop_paths->path, longest_path_ptr, end_head, prev_end_head, path_cycles, max_pipeline ) ; // Now deduct the excess time from total_time, max_pipeline, path_cycles, longest_pipeline_time, longest_poth_time. total_time -= variable_cycles ; path_cycles -= variable_cycles ; max_pipeline -= variable_cycles ; longest_pipeline_time -= variable_cycles ; longest_path_time -= variable_cycles ; /* End of Addition by Sibin M on April 27, 2004. */ if (verbose) { printc ("checking for structural hazard for phase 2.\n" "Here is union based on beg_head & prev_end_head:\n"); print_union (beg_head, prev_end_head); } for (i = 0; i < NUM_STAGES; ++i) { if (! prev_end_head[i]) prev_iter_done[i] = 0; else prev_iter_done[i] = total_time - prev_end_head[i]->cycles ; // + 1; /* Above change made by Sibin M on April 28, 2004. * Removed the +1 at the end of the calculation for prev_iter_done. * Is this correct though ???? */ new_iter_start[i] = longest_path_ptr->beg_cycles[i]; if (longest_path_ptr->end_cycles[i] == 0) new_iter_done[i] = 0; else new_iter_done[i] = longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[i] ; // + 1; /* Above change made by Sibin M on April 28, 2004. * Removed the +1 at the end of the calculation for new_iter_done. * Is this correct though ???? - Check this up again. */ if (! prev_end_head[i] || new_iter_start[i] == 0) sums[i] = 0; else sums[i] = prev_end_head[i]->cycles + new_iter_start[i]; } smallest_sum = smallest_entry (sums, 0, NUM_STAGES); /*MAYBE SHOULD USE THE FREQUENCY MODEL IN THE IF BLOCK BELOW... BUT THE IF BLOCK IS ALMOST NEVER EXECUTED */ /* Comment by Sibin M - April 18, 2004. * Why is this number 6 here ???? - Check this up again. * What is it's significance ? */ if (smallest_sum < 6) { if (verbose) printc("Adding %d to prev total time, struct haz with prev iters\n", (6 - smallest_sum) * iterations_to_do); total_time += (6 - smallest_sum) * iterations_to_do; visa_worst_case += 6 - smallest_sum; if (iterations_handled < max_iterations) avg_total_time += (6 - smallest_sum) * Min (iterations_to_do, max_iterations - iterations_handled); } if (verbose) { printc ("prev total time = %d\n", total_time); printc ("new iter time = %d\n", longest_path_ptr->path_cycles); print_array (prev_iter_done, 0, NUM_STAGES, "prev iter done"); print_array (new_iter_start, 0, NUM_STAGES, "new iter start"); print_array (new_iter_done, 0, NUM_STAGES, "new iter done "); print_array (sums, 0, NUM_STAGES, "sums "); printc ("smallest sum is %d\n", smallest_sum); } incr = incr_new_total_time (total_time, prev_iter_done, new_iter_start, new_iter_done); total_time += incr * iterations_to_do; if (visa_worst_case < incr) { visa_worst_case = incr ; } total_latency += longest_path_ptr->total_latency * iterations_to_do; total_icache += longest_path_ptr->icache_misses * iterations_to_do;; total_dcache += longest_path_ptr->dcache_misses * iterations_to_do;; total_overlap += longest_path_ptr->overlap * iterations_to_do; temp_diff += longest_path_ptr->diff *iterations_to_do; if (iterations_handled < max_iterations) { avg_total_time += incr * Min (iterations_to_do, max_iterations - iterations_handled); avg_total_latency += longest_path_ptr->total_latency * Min (iterations_to_do, max_iterations - iterations_handled); } /* Let's look for a delay between steady state iterations just * as we did after the 1st. 2/26/96 */ /*MARKED - WRAP AROUND CACHE INFO .. IGNORED WRT FREQUENCY MODEL*/ #ifdef TRASH if (wrap_around) { if (verbose) print_cache_info(longest_path_ptr); /* new code for adding inner delay after fm to total time if the fm that caused delay is now an always miss 3/18/96 */ if (verbose) { printc ("Here is longest path's delay after fm list...\n "); print_delay_after_fm_list(longest_path_ptr->delay_after_fm_head); } for (delay_after_fm_temp = longest_path_ptr->delay_after_fm_head; delay_after_fm_temp; delay_after_fm_temp = delay_after_fm_temp->next) { if (verbose) printc ("examining inst %d\n", delay_after_fm_temp->inst); cblock = find_fm_cblock (delay_after_fm_temp->inst, longest_path_ptr->worst_fm_list); if (!cblock || cblock->cat_list->worst_case == 'm') { if (verbose) printc ("add %d to total time & take it from delay after fm\n", delay_after_fm_temp->cycles); total_time += delay_after_fm_temp->cycles * iterations_to_do; } } if (longest_path_ptr->beg_cache_line == longest_path_ptr->end_cache_line) { diff = longest_path_ptr->end_avail_time [longest_path_ptr->first_word_num] - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 2); } else { diff = largest_entry (longest_path_ptr->end_avail_time, 0, words_per_line) - (longest_path_ptr->path_cycles - longest_path_ptr->end_cycles[IF] + 1); } if (verbose) printc ("diff = %d\n", diff); poss_update (&diff, 0, "diff can't be negative"); /* * Accumulate the last line delay we just discovered with the rest * of it (accrued from deeper levels). */ cblock = find_fm_cblock (node_ptr->last_fm, longest_path_ptr->worst_fm_list); if (!cblock || cblock->cat_list->worst_case == 'm' || longest_path_ptr->last_miss_cat == 'm') { /*INSIDE THE TRASH BLOCK */ total_time += diff * iterations_to_do; visa_worst_case += diff; /* here too 3/20/96 */ if (verbose) { printc ("I just added %d to total pipeline time\n", diff * iterations_to_do); print_array (prev_iter_done, 0, NUM_STAGES, "prev iter done"); print_array (new_iter_start, 0, NUM_STAGES, "new iter start"); } for (i = 0; i < NUM_STAGES; ++i) diffs[i] = prev_iter_done[i] - new_iter_start[i]; if (verbose) printc ("should I reduce by max diff %d + delay %d?\n", largest_entry (diffs, 0, NUM_STAGES) - diffs[0], delay); reduce = largest_entry (diffs, 0, NUM_STAGES) - diffs[0] + delay; /* I should be able to overlap somehow 3/20/96 */ if (reduce > 0 && reduce < diff) { if (verbose) printc ("I'm taking %d away from total pipeline time.\n", reduce * iterations_to_do); total_time -= reduce * iterations_to_do; } else if (reduce >= diff) { if (verbose) printc ("I'm taking whole diff %d from total pipeline time.\n", diff * iterations_to_do); total_time -= diff * iterations_to_do; } if (!cblock || cblock->cat_list->worst_case == 'm' ) total_last_line_delay += longest_path_ptr->last_line_delay * iterations_to_do; } /* end of : if (!cblock ... ) */ } /* end of : if (wrap_around) */ #endif } /* end of : else - the alternative to if (only_fetch) */ /* find the delay between 2 consecutive iterations */ /*MARKED - */ delay = 0; find_reg_delay (&delay, union_release_time, union_src_needed); total_time += delay * iterations_to_do; visa_worst_case += delay; /* * 4. Update the number of iterations handled. */ iterations_handled += iterations_to_do; if (verbose) printc ("delay between steady-state iterations = %d\n", delay); /* We don't need to update loop_end_head because we've reached a steady state. But again I think we need to update any stages that may have become lonely (they are farther back in time now) */ if (verbose){ print_components (iterations_handled, total_time, total_extra_fh_time, total_extra_fm_time, total_shared_fm_time, total_first_line_delay, total_delay_after_fm, total_last_line_delay, total_fm_dead_cycles); printc ("Here is the loop's union at end of a phase 2.\n"); print_union (loop_beg_head, loop_end_head); } } /* end of replication */ } /* end of while-loop around first 2 phases */ /* =================================================================== */ for (exit_block_ptr = node_ptr->exit_list; exit_block_ptr; exit_block_ptr = exit_block_ptr->next) { if (naive_too) { if (verbose) { printc ("total wc p&c naive time = %d\n", total_pc_nai_time); printc ("total wc pipe naive time = %d\n", total_pipe_nai_time); } } /* * Plan: When we use the average number of iterations and * max_iterations is really less than abs_max_iterations, * store the avg_total_time to represent the average time. 4/18/99 */ if (verbose){ printc ("avg_total_time = %d\n", avg_total_time); print_components (max_iterations, total_time, total_extra_fh_time, total_extra_fm_time, total_shared_fm_time, total_first_line_delay, total_delay_after_fm, total_last_line_delay, total_fm_dead_cycles); } /* * Find the smallest value of end head cycle 2/23/01 * And enforce a rule that the smallest value must be 1. * (avoids underestimation to stats1, also adds 36 to lu100) */ min_value = 0x7fffffff; for (i = 0; i < NUM_STAGES; ++i) if (loop_end_head[i] && loop_end_head[i]->cycles < min_value) min_value = loop_end_head[i]->cycles; if (verbose) printc ("smallest end head cycles is %d\n", min_value); /*BOOKMARK- MARKED- AGAIN THE END CYCLES ARE BEING MESSSED UP... AM I DOING THE RIGHT THING HERE ?*/ for (i = 0; i < NUM_STAGES; ++i) if (loop_end_head[i]) loop_end_head[i]->cycles -= (min_value - 1); if (verbose){ printc ("Here is the loop's union about to be copied to exit block ptr\n" " as in exit_block_ptr->worst_case->end_head[stage]:\n"); print_union (loop_beg_head, loop_end_head); } /* * use the average total time when it is appropriate 5/15/99 */ if (! traditional) { /* * Find the loop element corresponding to this node. */ loop_ele_ptr = Find_Loop (inf, node_ptr->loop_header_block); /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ /* TODO: If we have a parametric loop, and the inner loops need to be averaged, * then the polynomial_worst_case field needs to contain information * about this avg_total_time. * There should be an analagous assignment statement for * total_time = avg_total_time; (8/12/2004). */ #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ if (loop_ele_ptr->need_to_average) { total_time = avg_total_time; total_latency = avg_total_latency; if (verbose) printc ("Iterations are averaged; use average total time.\n"); } } /* store loop's union in exit block pointer and node ptr */ for (stage = 0; stage < NUM_STAGES; ++stage) { exit_block_ptr->worst_case->beg_head[stage] = loop_beg_head[stage]; exit_block_ptr->worst_case->end_head[stage] = loop_end_head[stage]; node_ptr -> wc_beg_head[stage] = loop_beg_head[stage]; node_ptr -> wc_end_head[stage] = loop_end_head[stage]; } /* the NODE needs the worst time; the exit block needs all times */ total_extra_time = (total_extra_fh_time > total_extra_fm_time) ? total_extra_fh_time : total_extra_fm_time; /* complete computation of abs_total_time, used in cases like sort. */ /* * I suspect it is wrong to accumulate total times. For instance, * we could have a function with 2 exit blocks. Don't add their * abs_total_time += total_time + total_extra_time; * times together: it doesn't make sense. 12/12/98 */ /*MARKED - CHANGES DUE TO PARAMETERIA.... */ if (PARAMETERIZE) { poss_update_equations (&abs_total_time, &abs_total_latency, total_time,total_latency + total_extra_time); } else poss_update (&abs_total_time, total_time + total_extra_time, "abs_total_time"); /*MARKED - WRAP AROUND CACHE INFO .. IGNORED WRT FREQUENCY MODEL*/ #ifdef TRASH /* if (wrap_around) abs_total_time += total_first_line_delay + total_delay_after_fm + total_last_line_delay + total_fm_dead_cycles; */ #endif if (verbose) { printc ("total_time = %d, total_extra_time = %d\n", total_time, total_extra_time); printc ("abs_total_time is now %d\n", abs_total_time); } if (naive_too) { poss_update (&node_ptr->wc_pc_nai_time, total_pc_nai_time, ""); poss_update (&node_ptr->wc_pipe_nai_time, total_pipe_nai_time, ""); } /*MARKUP- LOOKS LIKE HAVE TO COMPARE 2 EQUATIONS*/ take_branch = false; if (PARAMETERIZE) { take_branch = compare_equations(total_time, total_extra_time+total_latency, node_ptr->wc_pipeline_time , node_ptr->extra_time +node_ptr->total_latency, &path_cond); } else if (total_time + total_extra_time > node_ptr -> wc_pipeline_time + node_ptr -> extra_time) take_branch = true; if (take_branch) { if (naive_too) node_ptr->wc_pc_nai_time = total_pc_nai_time; node_ptr->wc_abs_time = abs_total_time; node_ptr->extra_time = total_extra_time; node_ptr->extra_fh_time = total_extra_fh_time; node_ptr->extra_fm_time = total_extra_fm_time; if (!path_cond) { node_ptr->wc_pipeline_time = total_time; node_ptr->total_latency = total_latency; } else eq_merge(&node_ptr->total_latency,&node_ptr->wc_pipeline_time,total_latency, total_time); /*MARKED- NEW VARIABLE... THANKS TO FREQUENCY MODEL */ node_ptr->abs_total_latency = abs_total_latency; node_ptr->latency_considered = total_latency; node_ptr->icache_misses = total_icache; node_ptr->dcache_misses = total_dcache; node_ptr->overlap = total_overlap; node_ptr->diff = temp_diff; node_ptr->visa_worst_case = visa_worst_case; // printf("final %d %d %d\n",total_latency,total_icache,total_dcache); node_ptr->first_line_delay = total_first_line_delay; node_ptr->delay_after_fm = total_delay_after_fm; node_ptr->last_line_delay = total_last_line_delay; node_ptr->fm_dead_cycles = total_fm_dead_cycles; node_ptr->shared_fm_time = total_shared_fm_time; node_ptr->wc_first_prog_offset = longest_path_ptr->first_prog_offset; node_ptr->wc_beg_word_num = longest_path_ptr->beg_word_num; node_ptr->wc_beg_cache_line = longest_path_ptr->beg_cache_line; node_ptr->wc_end_cache_line = longest_path_ptr->end_cache_line; node_ptr->wc_beg_avail_time = longest_path_ptr->beg_avail_time; node_ptr->wc_end_avail_time = longest_path_ptr->end_avail_time; node_ptr->wc_fm_end_avail_time = longest_path_ptr->fm_end_avail_time; node_ptr->wc_finish_load = longest_path_ptr->finish_load; node_ptr->last_fm = longest_path_ptr->last_fm; node_ptr->wc_delay_after_fm_head = longest_path_ptr->delay_after_fm_head; for (i = 0; i < NUMREGS; ++i) { node_ptr->wc_src_needed[i] = union_src_needed[i]; node_ptr->wc_src_stage[i] = union_src_stage[i]; node_ptr->wc_release_time[i] = union_release_time[i]; } } if (verbose) printc ("CUMULATIVE EXTRA TIME %d\n", cum_extra_time); if (num_paths > max_iterations) node_ptr->extra_time = cum_extra_time; /* 5/25 */ if (naive_too) { exit_block_ptr -> worst_case -> pc_nai_time = total_pc_nai_time; exit_block_ptr -> worst_case -> pipe_nai_time = total_pipe_nai_time; } exit_block_ptr -> worst_case -> pipeline_time = total_time; exit_block_ptr -> worst_case -> abs_time = abs_total_time; exit_block_ptr -> worst_case -> extra_time = total_extra_time; /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ printf("Storing poly in exit block as: "); pretty_print_list(node_ptr->polynomial_worst_case); printf("\n"); exit_block_ptr -> worst_case -> polynomial = node_ptr->polynomial_worst_case; exit_block_ptr -> worst_case -> need_polynomial = need_polynomial; #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ /*MARKED- NEW VARIABLE... THANKS TO FREQUENCY MODEL */ exit_block_ptr -> worst_case -> total_latency = total_latency; exit_block_ptr -> worst_case -> abs_total_latency = abs_total_latency; exit_block_ptr -> worst_case -> latency_considered = total_latency; exit_block_ptr -> worst_case -> icache_misses = total_icache; exit_block_ptr -> worst_case -> dcache_misses = total_dcache; exit_block_ptr -> worst_case -> overlap = total_overlap; exit_block_ptr -> worst_case -> diff = temp_diff; if (num_paths > max_iterations) exit_block_ptr->worst_case -> extra_time = cum_extra_time; /* 5/25 */ exit_block_ptr -> worst_case -> extra_fh_time = total_extra_fh_time; exit_block_ptr -> worst_case -> extra_fm_time = total_extra_fm_time; exit_block_ptr -> worst_case -> shared_fm_time = total_shared_fm_time; exit_block_ptr -> worst_case -> first_line_delay = total_first_line_delay; exit_block_ptr -> worst_case -> delay_after_fm = total_delay_after_fm; exit_block_ptr -> worst_case -> last_line_delay = total_last_line_delay; exit_block_ptr -> worst_case -> fm_dead_cycles = total_fm_dead_cycles; exit_block_ptr->worst_case->first_prog_offset = longest_path_ptr->first_prog_offset; exit_block_ptr->worst_case->beg_word_num = longest_path_ptr->beg_word_num; exit_block_ptr->worst_case->beg_cache_line = longest_path_ptr->beg_cache_line; exit_block_ptr->worst_case->end_cache_line = longest_path_ptr->end_cache_line; exit_block_ptr->worst_case->beg_avail_time = longest_path_ptr->beg_avail_time; exit_block_ptr->worst_case->end_avail_time = longest_path_ptr->end_avail_time; exit_block_ptr->worst_case->fm_end_avail_time = longest_path_ptr->fm_end_avail_time; exit_block_ptr->worst_case->finish_load = longest_path_ptr->finish_load; exit_block_ptr->worst_case->last_fm = longest_path_ptr->last_fm; exit_block_ptr->worst_case->delay_after_fm_head = longest_path_ptr->delay_after_fm_head; for (i = 0; i < NUMREGS; ++i) { exit_block_ptr->worst_case->src_needed[i] = union_src_needed[i]; exit_block_ptr->worst_case->src_stage[i] = union_src_stage[i]; exit_block_ptr->worst_case->release_time[i] = union_release_time[i]; } Union_Cache_List (&loop_fm_list, longest_path_ptr->worst_fm_list); Union_Cache_List (&loop_fh_list, (struct cache_block*)longest_path_ptr->worst_fh_list); /* keep the loop fm list in case this is somebody's child */ Copy_Cache_List (&exit_block_ptr->worst_case->fm_list, loop_fm_list); Union_Cache_List(&exit_block_ptr->worst_case->fm_list, cum_fm_list); Copy_Cache_List (&exit_block_ptr->worst_case->fh_list, loop_fh_list); Update_Cache_List (exit_block_ptr->worst_case->fm_list); Update_Cache_List (exit_block_ptr->worst_case->fh_list); } /* end of looking at one exit block */ } /***************************************************************************** * * FUNCTION NAME: Time_Best_Case * ******************************************************************************* * * M-SPEC * * Initialize min_base_cycles to MAXINT * Initialize min_1st_iter to MAXINT * Initialize min_base_path to NULL * * Clear fm cache list * Clear fh cache list * * if (min loop iterations > 1) max_iterations > 1 * { * for (every path) * look for min base path * * { * * * if (path_type == BACKEDGE) * { * clear cache lists * * path->min_base = Time_Path(fm = HIT, fh = MISS) * path->iter_1st = Time_Path(fm = MISS, fh = MISS) * path->iter_1st_fh = Time_Path(fm = MISS, fh = HIT) * * * If there are only 2 iterations, then need to consider * * * only the first iteration and an exit iteration. * * if (min_iter == 2) * { * if (path->1st_iter_fh < min_1st_iter) * { * * Timing tool uses the base and adjust values in * * * calculating timing values for nodes. Since there * * * are only 2 iterations, use the time with first hits* * * to identify the minimum paths, however use the time* * * without first hit effects to calculate the base and* * * adjust values. The algorithm for determining the * * * timing value for a child node should use the first * * * hit cache list to identify the presense of first * * * hits and to consider any effects from those first * * * hits. * min_1st_iter = path->1st_iter_fh * min_path = path * } * } * else * { * * know at this point that minimum iterations must be * * * greater than 2. * * * * * * QUESTION: suppose node has 3 iterations and first hit * * * effects can make another path have less time than the * * * chosen path. * * * * * IF a first hit has to be valid for every path, this * * * may not be a problem. * * * HINT: * * * time base path iteration * * * calculate 1st iteration * * * time 1st iteration with with fh effects * * * use some type of equation to determine if this is * * * actual best case * * * * if (path->min_base < min_base_cycles) * { * min_base_cycles = path->min_base * min_path = path * } * } * } * } * * save min_path->fm cache list to fm cache list * save min_path->fh cache list to fh cache list * * * now have determined the backedge path with the min base time * * * * Have calculated 3 times for each path * * * * need to calculate 2 times for the 1st iteration of the loop * * * One value will not include any effects from first hits. * * * This value can be used in the calculation of the adjust value. * * * The second value will include effects from first hits and * * * will be used to calculate the base time and best case time. * * * calculate 1st iteration * * * * * Clearing cache lists because for the 1st iteration the cache * * * lists should be empty. * * Clear cache list for first misses * Clear cache list for initial hits * time_1st_iteration = Time_Path( fm=MISS, fh=MISS) * time_1st_iteration_fh = Time_Path (fm=MISS, fh=HIT) * * Save fm cache to fm_list * Save fh cache to fh_list * * } * * if ( !(min_path) || (min_iter == 2)) * min_base_cycles = 0 * * * Now find the shortest exit path. This path could be different * * * from the backedge path (if timing a loop and a backedge was * * * taken.) If timing a function, then cache lists should be clear * * * and timing values = 0. Since exit path will be iterated only * * * once, use the min_exit_path_cycles_fmiss to identify the * * * min_exit_path. * * * * If a first miss is already encountered, it should be in the cache* * * and treated as a hit. * * * min_exit_path_cycles = MAXINT * exit_path_cycles_fhit = 0 * min_exit_path = NULL * * Clear temporary cache lists * * Copy master cache lists into cache lists * * for (every exit path) * { * path->cycles_fmiss = Time_Path(fm=MISS, fh = MISS) * path->cycles_fmiss_fh = Timpe_Path(fm = MISS< fh = HIT) * * if (min iterations > 1) * { * if (path->cycles_fmiss < min_exit_path_cycles) * { * min_exit_path_cycles = path->cycles_fmiss * min_exit_path = path * } * } * else * { * if (path->cycles_fmiss_fh < min_exit_path_cycles) * { * min_exit_path_cycles = path->cycles_fmiss_fh * min_exit_path = path * * } * } * } * * * at this point should have identified the exit path with the * * * minimum number of cycles. * * * * for the min exit path, calculate the number of cycles with first * * * misses are considered a hit. This will be used in the * * * calculation of the adjust value. Consider first hits as misses * * * because this result will be used in the calculation of the adjust* * * value. * min_exit_path_fhit = Time_Path(fm = HIT, fh = MISS) * * * Union the minimum exit path fm cache list with the fm_list * Union the minimum exit path fh cache list with the fh_list * * Calculate the base time * base_time = 0 * if (min_iterations > 1) * base_time = time_1st_iteration + * min_base_path_cycles * (min_iterations-2) + * min_exit_path->cycles_fmiss * * Calculate the adjust value * adj_value = (time_1st_iteration - min_base_path_cycles) + * (min_exit_path->cycles_fmiss - min_exit_path_fhit) * * Calculate the value for the best time. * best_case_time = time_1st_iteration_fh + * (min_base_path_cycles *(min_iteration -2)) + * min_exit_path_cycles *******************************************************************************/ void Time_Best_Case( struct inf_func_element* inf, struct tnode* node_ptr ) { int i, total_time, shortest_path_time, union_release_time[NUMREGS], union_src_needed[NUMREGS], stage, delay, total_extra_time, union_src_stage[NUMREGS], union_occ_cycles[NUM_STAGES], max_pipeline, node_occ_cycles[NUM_STAGES], total_nai_time, shortest_nai_path_time, min_iterations, old_total_time, abs_min_iterations, required_iterations, nonrequired_iterations, iterations_remaining, nonreq_iterations_to_do, iterations_to_do, shortest_pipe_nai_path_time, total_pipe_nai_time, prev_iter_done[NUM_STAGES], new_iter_start[NUM_STAGES], new_iter_done[NUM_STAGES], incr; struct cache_block *loop_fm_list, *tmp_ptr = NULL, *tmp_fh_ptr = NULL /* 1/17/96 */; struct union_list_node *beg_head[NUM_STAGES], *loop_beg_head[NUM_STAGES], *end_head[NUM_STAGES], *loop_end_head[NUM_STAGES], *beg_temp[NUM_STAGES], *end_temp[NUM_STAGES], *prev_end_head[NUM_STAGES]; struct loop_path_node *shortest_path; struct exit_block *exit_block_ptr = NULL; struct loop_path_node *path_ptr = NULL; struct range_node *range_ptr; min_iterations = node_ptr->loop_paths->min_iterations; abs_min_iterations = node_ptr->loop_paths->abs_min_iterations; #ifdef INTERFACE /* * To make user interface work, we need to assign values to certain * path numbers (5/31) took out useless stuff Nov. 7, 1998: * path_ptr->est.min_base (and _fmiss) were just set to 0 and never used. */ for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { path_ptr->min_base_fh.time = Time_Path(inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, BEST, HIT, HIT, 0, 0, NO_INST, NO_INST); } #endif num_paths = set_path_numbers(node_ptr); iterations_handled = 0; printc ("entering TBC\n"); printc ("number of paths: %d\n", num_paths); printc ("number of iterations: %d\n", min_iterations); printc ("abs min # of iterations: %d\n", abs_min_iterations); if (! ignore_value) { find_compares (inf, node_ptr, BEST); check_exit (node_ptr); infeasible_exit(inf, node_ptr); infeasible_transition(inf, node_ptr); } min_iterations = node_ptr->loop_paths->min_iterations; abs_min_iterations = node_ptr->loop_paths->abs_min_iterations; /* * Copied from twc, 4/26/99 * For all continue paths, initialize the number of required and * nonrequired iterations based on the min & max iterations that * were determined by "value-based" constraints. Nov. 21, 1998 * Also, compute iterations_required as the sum of all the required * iterations of all the continue paths in this loop. */ required_iterations = 0; for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { if (path_ptr->path_type_mask == NONE) continue; path_ptr->required_iters_left = path_ptr->min_iter; path_ptr->nonrequired_iters_left = path_ptr->max_iter - path_ptr->min_iter; required_iterations += path_ptr->min_iter; } compute_ranges(node_ptr); /* something else we need 4/27/99 */ /* * If we are unable to get sets of ranges, then just make one range * encompassing all iters of the loop. 6/29/99 */ if (! node_ptr->range_list) { node_ptr->range_list = (struct range_node *) malloc (sizeof (struct range_node)); node_ptr->range_list->number = 1; node_ptr->range_list->path_mask = 0; for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { if (path_ptr->path_type_mask != NONE) { path_ptr->range_index = 1; node_ptr->range_list->path_mask |= 1 << path_ptr->number; } } node_ptr->range_list->min = 1; node_ptr->range_list->max = min_iterations; node_ptr->range_list->max_iter = min_iterations; node_ptr->range_list->next = NULL; } print_range_list(node_ptr); /* * If we are not performing value dependent constraint analysis, * set the above #iterations to default values. 4/20/99 */ if (ignore_value) { required_iterations = 0; for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { path_ptr->required_iters_left = 0; if (path_ptr->path_type_mask == (BACKEDGE | EXIT)) path_ptr->nonrequired_iters_left = min_iterations; else if (path_ptr->path_type_mask & BACKEDGE) path_ptr->nonrequired_iters_left = min_iterations - 1; else if (path_ptr->path_type_mask & EXIT) path_ptr->nonrequired_iters_left = 1; } } nonrequired_iterations = min_iterations - required_iterations; print_path_iters(node_ptr); printc ("%d required and %d nonrequrired iterations\n", required_iterations, nonrequired_iterations); if (naive_too) { node_ptr->bc_pc_nai_time = MAXINT; node_ptr->bc_pipe_nai_time = MAXINT; shortest_nai_path_time = MAXINT; shortest_pipe_nai_path_time = MAXINT; total_nai_time = 0; total_pipe_nai_time = 0; } node_ptr->bc_pipeline_time = MAXINT; shortest_path_time = MAXINT; shortest_path = NULL; total_time = 0; total_extra_time = 0; loop_fm_list = NULL; for (i = 0; i < NUMREGS; ++i) /* init union's register info */ { union_release_time[i] = NEVER; union_src_needed[i] = NEVER; union_src_stage[i] = NO_STAGE; } for (stage = 0; stage < NUM_STAGES; ++stage) { union_occ_cycles[stage] = MAXINT; /* we will want min of each path's */ node_ptr->occ_cycles[stage] = MAXINT; node_occ_cycles[stage] = 0; /* cum total of all iters' occupation */ beg_head[stage] = NULL; end_head[stage] = NULL; loop_beg_head[stage] = NULL; loop_end_head[stage] = NULL; } if (1 || min_iterations > 1) { /* find the longest pipeline time for the paths we will need below to create the union. This is not the same as the base_time, which assumes fm=hit. (9/24) */ max_pipeline = 0; tmp_ptr = NULL; tmp_fh_ptr = NULL; for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if (path_ptr->path_type_mask == NONE) continue; if (min_iterations > 1 && (path_ptr->path_type_mask & BACKEDGE) == 0) continue; if (min_iterations == 1 && (path_ptr->path_type_mask & EXIT) == 0) continue; tmp_ptr = NULL; tmp_fh_ptr = NULL; call_site = 'h'; // added by Sibin M ( oct 25, 2004 ) - why the hell didn't this // have the required number of args ? // I had to add the last "0" Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, BEST, MISS, MISS, 0, 0, NO_INST, NO_INST, 0 ); poss_update (&max_pipeline, path_cycles, "update max_pipeline"); } /* * "Phase One": Find the shortest path P for the first iteration. * For this iteration only, we treat fm=miss and fh=hit. * If the number of non-required iterations for the loop > 0, then * the shortest path P is chosen from all continue paths; * else * P is chosen from those continue paths that have some required * iterations. * Note: we will still require P->set.maxiters > 0 when choosing * the shortest path. This "set" is a range of iterations * containing P's range. */ iterations_remaining = min_iterations; for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { /* * Don't forget to update how many iters remaining. 5/20/99 */ iterations_remaining = min_iterations - iterations_handled; if (min_iterations > 1 && (path_ptr->path_type_mask & BACKEDGE) == 0) continue; if (min_iterations == 1 && (path_ptr->path_type_mask & EXIT) == 0) continue; if (! ignore_value && iterations_remaining > required_iterations && ! (path_ptr->required_iters_left > 0 || path_ptr->nonrequired_iters_left > 0) ) continue; if (! ignore_value && iterations_remaining == required_iterations && ! (path_ptr->required_iters_left > 0) ) continue; if (! ignore_value) { range_ptr = find_paths_range (node_ptr, path_ptr); if (range_ptr->max_iter == 0) continue; } path_ptr->best_fm_list = NULL; tmp_fh_ptr = NULL; call_site = 'i'; // added by Sibin M ( oct 25, 2004 ) - why the hell didn't this // have the required number of args ? // I had to add the last "0" Time_Path (inf, node_ptr, path_ptr, &path_ptr->best_fm_list, &tmp_fh_ptr, BEST, MISS, HIT, 0, 0, NO_INST, NO_INST, 0); if (naive_too && path_ptr->bc_pc_nai_time < shortest_nai_path_time) shortest_nai_path_time = path_ptr->bc_pc_nai_time; if (naive_too && path_ptr->bc_pipe_nai_time < shortest_pipe_nai_path_time) shortest_pipe_nai_path_time = path_ptr->bc_pipe_nai_time; /* I think we need to store the updated fm list for every path on the first iteration, because we'll time all the paths again for the middle iterations. (6/7) */ if (path_cycles < shortest_path_time) { shortest_path_time = path_cycles; printc ("shortest time for iter # 1 is now %d\n", shortest_path_time); shortest_path = path_ptr; } /* ============================================================ */ /* create a union of paths including this newly timed one (6/3) */ /* update the union's occ cycle info: we want the largest possible amount of space per stage, hence least occupation (6/14) */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (path_ptr->occ_cycles[stage] < union_occ_cycles[stage]) union_occ_cycles[stage] = path_ptr->occ_cycles[stage]; } /* update union's register info: notice that > replaces < because for best case we want later need times and earlier releases */ for (i = 0; i < NUMREGS; ++i) { if ( ( union_release_time[i] == NEVER && path_ptr->release_time[i] != NEVER ) || (path_ptr->release_time[i] > union_release_time[i]) ) { union_release_time[i] = path_ptr->release_time[i]; } if ( ( union_src_needed[i] == NEVER && path_ptr->src_needed[i] != NEVER ) || (path_ptr->src_needed[i] > union_src_needed[i]) ) { union_src_needed[i] = path_ptr->src_needed[i]; union_src_stage[i] = path_ptr->src_stage[i]; } } print_reg_info (union_src_needed, union_src_stage, union_release_time); /* before getting the end_head of the union, check whether this path is longer than max_pipeline; if so increment the cycles assoc with the end cycles of the union so far determined (9/25) */ if (path_cycles > max_pipeline) { for (stage = 0; stage < NUM_STAGES; ++stage) if (end_head[stage]) end_head[stage]->cycles += path_cycles - max_pipeline; max_pipeline = path_cycles; } /* note that in the comparisons below between the path's and the union's begnning and ending information, we only update the union if the path's respective number is GREATER, unlike in twc. */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (beg_head[stage]) { if (path_ptr->beg_cycles[stage] > beg_head[stage]->cycles) beg_head[stage]->cycles = path_ptr->beg_cycles[stage]; } else if (path_ptr->beg_occupant[stage]) { memory_count += 12; beg_temp[stage] = (struct union_list_node *) malloc (sizeof (struct union_list_node)); beg_temp[stage]->occupant = path_ptr->beg_occupant[stage]; beg_temp[stage]->cycles = path_ptr->beg_cycles[stage]; beg_temp[stage]->next = beg_head[stage]; beg_head[stage] = beg_temp[stage]; } if (end_head[stage]) { if (path_cycles < max_pipeline) { if (path_ptr->end_cycles[stage] + (max_pipeline - path_cycles) > end_head[stage]->cycles) end_head[stage]->cycles = path_ptr->end_cycles[stage] + (max_pipeline - path_cycles); } else { if (path_ptr->end_cycles[stage] > end_head[stage]->cycles) end_head[stage]->cycles = path_ptr->end_cycles[stage]; } /* (9/19) Note that we are adding max_pipeline - path_cycles to the end cycles of the path for the stage to account for the possiblility that some paths are longer than others */ if (path_cycles < max_pipeline) printc ("stage %d: I had to adjust end cycles by %d\n", stage, max_pipeline - path_cycles); } else if (path_ptr->end_occupant[stage]) { memory_count += 12; end_temp[stage] = (struct union_list_node *) malloc (sizeof (struct union_list_node)); end_temp[stage]->occupant = path_ptr->end_occupant[stage]; if (path_cycles < max_pipeline) end_temp[stage]->cycles = path_ptr->end_cycles[stage] + (max_pipeline - path_cycles); else end_temp[stage]->cycles = path_ptr->end_cycles[stage]; if (path_cycles < max_pipeline) printc ("stage %d: I had to adjust end cycles by %d\n", stage, max_pipeline - path_cycles); end_temp[stage]->next = end_head[stage]; end_head[stage] = end_temp[stage]; } } printc ("Here is the path's union after 1 iteration of loop:\n"); print_union (beg_head, end_head); } /* end of timing all the continue paths on 1st iteration */ /* * At this point we have identified the shortest_path for the * first iteration. Decrement by 1 either the number of required * or nonrequired iterations, as appropriate, from this path, the * loops total as well as from the range set containing this path. * 4/27/99 */ if (shortest_path->required_iters_left > 0) { --shortest_path->required_iters_left; --required_iterations; printc ("path %d's required iterations decremented by one to %d\n", shortest_path->number, shortest_path->required_iters_left); printc ("loop's required iterations decremented by one to %d\n", required_iterations); } else { --shortest_path->nonrequired_iters_left; --nonrequired_iterations; printc ("path %d's nonreq iterations decremented by one to %d\n", shortest_path->number, shortest_path->nonrequired_iters_left); printc ("loop's nonreq iterations decremented by one to %d\n", nonrequired_iterations); } /* * reduce the max_iter of the range corresponding to the longest path */ if (! ignore_value) { for (range_ptr = node_ptr->range_list; range_ptr; range_ptr = range_ptr->next) if (range_ptr->number == shortest_path->range_index) break; --range_ptr->max_iter; printc ("about to reduce range %d by one to %d\n", range_ptr->number, range_ptr->max_iter); } if (naive_too) { total_nai_time = shortest_nai_path_time; total_pipe_nai_time = shortest_pipe_nai_path_time; } Union_Cache_List (&loop_fm_list, shortest_path->best_fm_list); total_time = shortest_path->path_cycles; total_extra_time = shortest_path->bc_extra_time; for (stage = 0; stage < NUM_STAGES; ++stage) { node_occ_cycles[stage] = union_occ_cycles[stage]; } printc ("After 1st iteration, naive time is %d\n", total_nai_time); printc (" and pipe nai time = %d\n", total_pipe_nai_time); printc ("After 1st iteration, shortest time is %d, extra time is %d\n", total_time, total_extra_time); print_array (union_occ_cycles, 0, NUM_STAGES, "node occ cycles"); /* the union becomes that of the loop - no data hazards between loop iterations yet because we've just seen 1 iteration */ for (stage = 0; stage < NUM_STAGES; ++stage) { loop_beg_head[stage] = beg_head[stage]; loop_end_head[stage] = end_head[stage]; } } /* end of 1st iteration of the loop */ iterations_handled = 1; /* The middle n-2 iterations: fm = hit and fh = miss */ /* * We need to reset the register union info, because a late register * need from first iteration based on first miss should be disregarded * now since that register is now needed sooner. 6/22/99 */ for (i = 0; i < NUMREGS; ++i) /* init union's register info */ { union_release_time[i] = NEVER; union_src_needed[i] = NEVER; union_src_stage[i] = NO_STAGE; } if (naive_too) { shortest_nai_path_time = MAXINT; shortest_pipe_nai_path_time = MAXINT; } shortest_path_time = MAXINT; shortest_path = NULL; for (stage = 0; stage < NUM_STAGES; ++stage) union_occ_cycles[stage] = MAXINT; /* reset for middle iter */ while (iterations_handled < min_iterations) { if (naive_too) { shortest_nai_path_time = MAXINT; shortest_pipe_nai_path_time = MAXINT; } shortest_path_time = MAXINT; shortest_path = NULL; for (stage = 0; stage < NUM_STAGES; ++stage) union_occ_cycles[stage] = MAXINT; /* reset for middle iter */ /* find the longest pipeline time for the paths we will need below to create the union. This is not the same as the base_time, which assumes fm=hit. (9/24) */ max_pipeline = 0; for (path_ptr = node_ptr -> loop_paths -> path; path_ptr; path_ptr = path_ptr -> next) { if ((path_ptr->path_type_mask & BACKEDGE) == 0) continue; call_site = 'j'; // added by Sibin M ( oct 25, 2004 ) - why the hell didn't this // have the required number of args ? // I had to add the last "0" Time_Path (inf, node_ptr, path_ptr, &tmp_ptr, &tmp_fh_ptr, BEST, MISS, MISS, 0, 0, NO_INST, NO_INST, 0); poss_update (&max_pipeline, path_cycles, "update max_pipeline"); } for (path_ptr = node_ptr->loop_paths->path; path_ptr; path_ptr = path_ptr->next) { /* * Don't forget to update how many iters remaining. 5/20/99 * Another note: for iterations after the first, we allow * both continue as well as exit paths. */ iterations_remaining = min_iterations - iterations_handled; if (path_ptr->path_type_mask == NONE) { printc ("path %d is infeasible, so continue\n", path_ptr->number); continue; } if (! ignore_value && iterations_remaining > required_iterations && ! (path_ptr->required_iters_left > 0 || path_ptr->nonrequired_iters_left > 0) ) { printc ("path %d has no iters to contribute, so continue\n", path_ptr->number); continue; } if (! ignore_value && iterations_remaining == required_iterations && ! (path_ptr->required_iters_left > 0) ) { printc ("path %d has no required iters, so continue\n", path_ptr->number); continue; } if (! ignore_value) { range_ptr = find_paths_range (node_ptr, path_ptr); if (range_ptr->max_iter == 0) { printc ("path %d's range's max iter is zero, so continue\n", path_ptr->number); continue; } } if (! ignore_value && infeasible_continue(node_ptr, path_ptr, BEST)) continue; tmp_fh_ptr = NULL; call_site = 'k'; // added by Sibin M ( oct 25, 2004 ) - why the hell didn't this // have the required number of args ? // I had to add the last "0" Time_Path (inf, node_ptr, path_ptr, &path_ptr->best_fm_list, &tmp_fh_ptr, BEST, HIT, MISS, 0, 0, NO_INST, NO_INST, 0); if (naive_too && path_ptr->bc_pc_nai_time < shortest_nai_path_time) shortest_nai_path_time = path_ptr->bc_pc_nai_time; if (naive_too && path_ptr->bc_pipe_nai_time < shortest_pipe_nai_path_time) shortest_pipe_nai_path_time = path_ptr->bc_pipe_nai_time; if (path_cycles < shortest_path_time) /* don't include extra time */ { shortest_path_time = path_cycles; printc ("shortest time for middle iter is now %d\n", shortest_path_time); shortest_path = path_ptr; } /* ======================================================= */ /* create a union of paths including this newly timed one */ /* update union's occupied cycles */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (path_ptr->occ_cycles[stage] < union_occ_cycles[stage]) union_occ_cycles[stage] = path_ptr->occ_cycles[stage]; } /* update union's register info */ for (i = 0; i < NUMREGS; ++i) { if ( ( union_release_time[i] == NEVER && path_ptr->release_time[i] != NEVER ) || (path_ptr->release_time[i] > union_release_time[i]) ) { union_release_time[i] = path_ptr->release_time[i]; } if ( ( union_src_needed[i] == NEVER && path_ptr->src_needed[i] != NEVER ) || (path_ptr->src_needed[i] > union_src_needed[i]) ) { union_src_needed[i] = path_ptr->src_needed[i]; union_src_stage[i] = path_ptr->src_stage[i]; } } print_reg_info (union_src_needed, union_src_stage, union_release_time); /* before getting the end_head of the union, check whether this path is longer than max_pipeline; if so increment the cycles assoc with the end cycles of the union so far determined (9/25) */ if (path_cycles > max_pipeline) { for (stage = 0; stage < NUM_STAGES; ++stage) if (end_head[stage]) end_head[stage]->cycles += path_cycles - max_pipeline; max_pipeline = path_cycles; } for (stage = 0; stage < NUM_STAGES; ++stage) { if (beg_head[stage]) { if (path_ptr->beg_cycles[stage] > beg_head[stage]->cycles) beg_head[stage]->cycles = path_ptr->beg_cycles[stage]; } else if (path_ptr->beg_occupant[stage]) { memory_count += 12; beg_temp[stage] = (struct union_list_node *) malloc (sizeof (struct union_list_node)); beg_temp[stage]->occupant = path_ptr->beg_occupant[stage]; beg_temp[stage]->cycles = path_ptr->beg_cycles[stage]; beg_temp[stage]->next = beg_head[stage]; beg_head[stage] = beg_temp[stage]; } if (end_head[stage]) { if (path_cycles < max_pipeline) { if (path_ptr->end_cycles[stage] + (max_pipeline - path_cycles) > end_head[stage]->cycles) end_head[stage]->cycles = path_ptr->end_cycles[stage] + (max_pipeline - path_cycles); } else { if (path_ptr->end_cycles[stage] > end_head[stage]->cycles) end_head[stage]->cycles = path_ptr->end_cycles[stage]; } /* (9/19) Note that we are adding base_time - path_cycles to the end cycles of the path for the stage to account for the possiblility that some paths are longer than others */ if (path_cycles < max_pipeline) printc ("stage %d: I had to adjust end cycles by %d\n", stage, max_pipeline - path_cycles); } else if (path_ptr->end_occupant[stage]) { memory_count += 12; end_temp[stage] = (struct union_list_node *) malloc (sizeof (struct union_list_node)); end_temp[stage]->occupant = path_ptr->end_occupant[stage]; if (path_cycles < max_pipeline) end_temp[stage]->cycles = path_ptr->end_cycles[stage] + (max_pipeline - path_cycles); else end_temp[stage]->cycles = path_ptr->end_cycles[stage]; if (path_cycles < max_pipeline) printc ("stage %d: I had to adjust end cycles by %d\n", stage, max_pipeline - path_cycles); end_temp[stage]->next = end_head[stage]; end_head[stage] = end_temp[stage]; } } printc ("Here is the path's union:\n"); print_union (beg_head, end_head); } /* end of timing all the continue paths */ delay = 0; find_reg_delay (&delay, union_release_time, union_src_needed); printc ("delay between iterations of loop is %d\n", delay); shortest_path_time += delay; /* now copy the path's end_head to loop_end_head */ for (stage = 0; stage < NUM_STAGES; ++stage) loop_end_head[stage] = end_head[stage]; Union_Cache_List (&loop_fm_list, shortest_path->best_fm_list); /* * Next, we find the number of iterations to do for this shortest path. * Essential calculation for phase two of best case. 4/27/99 */ if (! ignore_value) { for (range_ptr = node_ptr->range_list; range_ptr; range_ptr = range_ptr->next) if (range_ptr->number == shortest_path->range_index) break; nonreq_iterations_to_do = Min (Min (nonrequired_iterations, shortest_path->nonrequired_iters_left), range_ptr->max_iter - shortest_path->required_iters_left); iterations_to_do = shortest_path->required_iters_left + nonreq_iterations_to_do; printc ("nonreq iters to do = %d\n", nonreq_iterations_to_do); printc ("iterations to do = %d\n", iterations_to_do); } else { nonreq_iterations_to_do = min_iterations - 1; iterations_to_do = min_iterations - 1; } required_iterations -= shortest_path->required_iters_left; printc ("reducing loop's req iters by %d to %d\n", shortest_path->required_iters_left, required_iterations); shortest_path->required_iters_left = 0; if (! ignore_value) { range_ptr->max_iter -= iterations_to_do; printc ("corresponding range's max iter reduced by %d to %d\n", iterations_to_do, range_ptr->max_iter); } shortest_path->nonrequired_iters_left -= nonreq_iterations_to_do; iterations_handled += iterations_to_do; printc ("path %d's nonreq iters reduced by %d to %d\n", shortest_path->number, nonreq_iterations_to_do, shortest_path->nonrequired_iters_left); printc ("iterations handled incremented by %d to %d\n", iterations_to_do, iterations_handled); if (naive_too) { total_nai_time += iterations_to_do * shortest_nai_path_time; total_pipe_nai_time += iterations_to_do * shortest_pipe_nai_path_time; } for (i = 0; i < NUM_STAGES; ++i) prev_end_head[i] = end_head[i]; for (i = 0; i < NUM_STAGES; ++i) { if (! prev_end_head[i]) prev_iter_done[i] = 0; else prev_iter_done[i] = total_time - prev_end_head[i]->cycles + 1; new_iter_start[i] = shortest_path->beg_cycles[i]; if (shortest_path->end_cycles[i] == 0) new_iter_done[i] = 0; else new_iter_done[i] = shortest_path->path_cycles - shortest_path->end_cycles[i] + 1; } printc ("prev total time = %d\n", total_time); printc ("new iter time = %d\n", shortest_path->path_cycles); print_array (prev_iter_done, 0, NUM_STAGES, "prev iter done"); print_array (new_iter_start, 0, NUM_STAGES, "new iter start"); print_array (new_iter_done, 0, NUM_STAGES, "new iter done "); incr = incr_new_total_time (total_time, prev_iter_done, new_iter_start, new_iter_done); if (only_fetch || cache_only) total_time += iterations_to_do * (shortest_path->path_cycles + delay); else /* total_time += iterations_to_do * incr; */ total_time += iterations_to_do * (shortest_path->path_cycles + delay - 4); printc ("shortest time is %d\n", total_time); printc ("Here is the loop's union just before the exit iteration.\n"); print_union (loop_beg_head, loop_end_head); /* update node_occ_cycles (6/14) */ for (stage = 0; stage < NUM_STAGES; ++stage) { node_occ_cycles[stage] += union_occ_cycles[stage] * iterations_to_do; } } /* end of "if min_iterations > 2" */ printc ("before last iter, total time = %d, extra time = %d\n", total_time, total_extra_time * (min_iterations - 1)); total_time -= total_extra_time * (min_iterations - 1); print_array (node_occ_cycles, 0, NUM_STAGES, "node occ cycles"); /* ====================== */ /* now for the exit paths */ old_total_time = total_time; for (exit_block_ptr = node_ptr->exit_list; exit_block_ptr; exit_block_ptr = exit_block_ptr->next) { /* store loop's union in exit block pointer and node ptr */ for (stage = 0; stage < NUM_STAGES; ++stage) { exit_block_ptr->best_case->occ_cycles[stage] = node_occ_cycles[stage] + union_occ_cycles[stage]; if (node_occ_cycles[stage] + union_occ_cycles[stage] < node_ptr->occ_cycles[stage]) { node_ptr->occ_cycles[stage] = node_occ_cycles[stage] + union_occ_cycles[stage]; } exit_block_ptr->best_case->beg_head[stage] = loop_beg_head[stage]; exit_block_ptr->best_case->end_head[stage] = loop_end_head[stage]; node_ptr -> bc_beg_head[stage] = loop_beg_head[stage]; node_ptr -> bc_end_head[stage] = loop_end_head[stage]; } /* the NODE needs the best time; the exit block needs all times */ if (naive_too && total_nai_time < node_ptr->bc_pc_nai_time) node_ptr->bc_pc_nai_time = total_nai_time; if (naive_too && total_pipe_nai_time < node_ptr->bc_pipe_nai_time) node_ptr->bc_pipe_nai_time = total_pipe_nai_time; if (total_time < node_ptr->bc_pipeline_time) { node_ptr->bc_pipeline_time = total_time; node_ptr->bc_extra_time = total_extra_time; for (i = 0; i < NUMREGS; ++i) { node_ptr->bc_src_needed[i] = union_src_needed[i]; node_ptr->bc_src_stage[i] = union_src_stage[i]; node_ptr->bc_release_time[i] = union_release_time[i]; } } if (naive_too) { exit_block_ptr -> best_case -> pc_nai_time = total_nai_time; exit_block_ptr->best_case->pipe_nai_time = total_pipe_nai_time; } exit_block_ptr -> best_case -> pipeline_time = total_time; exit_block_ptr -> best_case -> extra_time = total_extra_time; Copy_Cache_List (&exit_block_ptr->best_case->fm_list, loop_fm_list); Update_Cache_List (exit_block_ptr->best_case->fm_list); for (i = 0; i < NUMREGS; ++i) { exit_block_ptr->best_case->src_needed[i] = union_src_needed[i]; exit_block_ptr->best_case->src_stage[i] = union_src_stage[i]; exit_block_ptr->best_case->release_time[i] = union_release_time[i]; } } /* end of "for each exit block" */ print_array (node_ptr->occ_cycles, 0, NUM_STAGES, "node occ cycles"); } /***************************************************************************** * * FUNCTION NAME: Time_Path * ******************************************************************************* * * M-SPEC * * algorithm * * INPUTS : * tree node -- represents the node to time * timing case -- indicates best case or worst case timing * fm_status -- indicates whether or not should consider a 'fm' * a hit or a miss * * OUTPUT: integer representing the number of cycles required to * * ACCESS: * 1. the list of instructions contained in the node from the stats file * 2. the list of blocks that make up each possible path thru the node * 3. the list of instructions in each block from the inf file * * 1. access the loop node that represents all the possible paths thru the * node. note that the path is represented as a list of blocks * 2. for each path, time each block in the path * 3. return the running sum of the number of required cycles. * 4. assume that all child nodes have been timed * * ASSUMPTIONS: * 1. Assume that each loop (actual loop in the tree) will have only one * instance. * 2. Assume that if a block in a path is determined to be representing a * loop, that loop will appear as a child to the current tree node * (pointed to by the pointer node_ptr). * 3. If path analysis is timing a complete path, then the beg_block and * end_block values are completely ignored. * * * 4. It is assumed that when adding the frequency model, some of the times * are not affected by the fact that we are calculating ideal number of * cycles in path_cycles. What works for ideal will be the same for non- * ideal. This is an assumption already prevelant, since it is assumed that * the instr cache latency can be added to path_cycles. Remind me to thank * the simple pipeline. *******************************************************************************/ /*NOTE- IN THIS FUNCTION WHEREEVER THE PARAMETERIZE VARIABLE IS SEEN, IT IS ADDED TO INTRODUCE THE FREQUENCY MODEL */ /* Modified by Sibin M on Oct 25, 2004. * Process of cleaning up the TA for C++ * * Standard C++-style function definition. * The args are : * * * struct inf_func_element *inf; * struct tnode *node_ptr; * struct loop_path_node *path_ptr; * struct cache_block **fm_list; * struct cache_block **fh_list; list categories contain fh transit. * enum time_case tcase; * enum cache_access fm_status; consider first miss to be hit/miss * enum cache_access fh_status; consider first hit to be hit/miss * int beg_block; 1st block in path to start timing * int end_block; last block in path to time * int beg_inst; 1st inst in beg_block to time * int end_inst; last inst in end_block to time * int data_status; consider data access to be hit/miss * */ int Time_Path( struct inf_func_element* inf, struct tnode* node_ptr, struct loop_path_node* path_ptr, struct cache_block** fm_list, struct cache_block** fh_list, enum time_case tcase, enum cache_access fm_status, enum cache_access fh_status, int beg_block, int end_block, int beg_inst, int end_inst, int data_status ) { struct cache_block *cnode_fm_list = NULL; /* fm list from child node */ struct cache_block *cnode_fh_list = NULL; int exit_bnum; /* exit block number */ struct block_list *active_fh_list = NULL; /* list 1st hits already encountered */ struct exit_block *exit_bptr = NULL; /* exit block pointer */ struct loop_block_list *bptr = NULL; /* block pointer */ struct block_element *inf_bptr; /* inf block pointer */ struct tnode *child_loop = NULL; struct tnode *called_func; struct union_list_node *p, *inner_beg_head[NUM_STAGES], *inner_end_head[NUM_STAGES], *first_inner[NUM_STAGES], *last_inner[NUM_STAGES]; struct instruct *inf_inst_ptr, *instruct_ptr; struct inst_cat *stats_inst_ptr, *inst_cat_ptr; row *begin_use, *end_use; string *dest_reg, reg; struct block_list *blk_list_ptr; extern int inst_end_cycles [CARD_INST_SET][NUM_DATASIZES][NUM_STAGES][NUM_CASES]; int old_path_cycles, i, j,k,l, cycle_diff, num_path_inst, cycle, inst, cc_set, someone_fetched, tot_path_inst, next, prev, diff, new_path_cycles, delay, inst_after_loop = 0, all_empty, stage, size, this_inst_end_cycles[NUM_STAGES], this_inst_beg_cycles[NUM_STAGES], inst_after_func = NO_FUNC, fut_trans, delayed_register, inner_new_miss2hit, inner_still_somemiss, release_time[NUMREGS], src_needed[NUMREGS], smallest, inner_release_time[NUMREGS], inner_extra_time = 0, subpath_started, subpath_finished, src_stage[NUMREGS], extra_nonoverlap, inner_src_stage[NUMREGS], earliest_start[NUM_STAGES], block_inst, child_done[NUM_STAGES], inner_pipeline_time, inst_done[NUM_STAGES], inner_occ_cycles[NUM_STAGES], inner_vacant_cycles[NUM_STAGES], word_num, next_word_num, curr_time, last_word_loaded = 0, extra_delay = 0, cache_line = -1, prev_cache_line = -1, jumping = NO, inner_end_cache_line = -1, *inner_end_avail_time, inner_beg_cache_line, max_inner_end_avail_time = 0, inner_first_line_delay = 0, i2i_delay = 0, inner_delay_after_fm = 0, fm_inst, inner_last_fm, last_fm_f2f, *inner_beg_word_num, temp, *fm_avail_time, inner_last_line_delay = 0, fm_last_word_loaded = 0, *inner_fm_end_avail_time, pred_blk_num, found, max_inner_fm_end_avail_time = 0, pred_inst_num, num_preheader_blks, backward_num, miss_word, curr_word, pred_inst_cat, *order_of_fill, already_calc_first_line_delay, *inst_word_list, *fh_order_of_fill, i2f_jump_delay = 0, inner_fm_dead_cycles = 0, pending_dead_cycles, *path_pipeline, *pipeline_cycles, *dest_ready, *stage_src1_needed, *stage_src2_needed, *stage_src3_needed, *inst_cond_code, *inst_data_type, inner_finish_load, done[NUM_STAGES], inst2annul = NO_INST, inner_shared_fm_time, inner_extra_fm_time; int extra_latency = 0, icache_misses = 0, dcache_misses = 0; struct delay_after_fm_node *delay_after_fm_head = NULL, *delay_after_fm_temp = NULL, *inner_delay_after_fm_head, *inner_delay_after_fm_temp; bool in_cache, same_cache_line, annulled_branch_found = false, annul_delay_slot = false, assess_penalty, count_on_side, prev_Mispredicted_branch = false, prev_MEM_Miss = false, take_branch = false, done_once = false, inst_after_child_loop = false, temp_cond = false; struct block_element *prev_block_inf_ptr, *prev_inf_block_ptr, *blk_ele_ptr; struct loop_block_list *prev_block_ptr; struct block_list *block_ptr, *pred_blocklist; struct inst_cat *inst_ptr2, *pblock_linst_ptr, *new_ptr; int pblock_linst, max_inst_num; string loop_number, loop_name; struct loop_element *inf_loop_ptr, *loop_ele_ptr; struct tnode *cnode, *tnode_ptr; struct cycle_jump_node *cycle_jump, *cycle_jump_head = NULL; extern char pipe_diagram[]; char tmp[80]; struct inst_num_node *inst_num_head = NULL, *inst_num_tail = NULL, *inst_num_temp, *inst_num_ptr; struct instr_history *history; /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ struct term *tmp_term = NULL; int child_overcount_time = 0 ; #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ // extern bool check_update_variable_cycles( int, int[], int, int ) ; /* 1. initialize path's begin and end arrays - made global vars (2/20) */ ++invocation_num; if (verbose) { printc ("path %d in iteration # %d (invocation #%d) call site %c\n", path_ptr->number, iterations_handled + 1, invocation_num, call_site); printc ("Looking at %s case\n", (tcase == BEST) ? "best" : "worst"); printc ("fm_list at beginning of Time_Path\n"); Print_Cache_List(*fm_list); } #ifdef __SIB_DEBUG__ if( sib_verbose ) { /* printc( "\n---------------------------------------\n" ) ; printc( "Enter : Time_Path()\n" ) ; printc( "Invocation Number : %d\n", invocation_num ) ; printc( "iterations_handled = %d \t max_iterations = %d \n", iterations_handled, max_iterations ) ; printc( "---------------------------------------\n" ) ; */ } #endif // __SIB_DEBUG #ifdef INTERFACE pipe_diagram[0] = '\0'; #endif /*FIRST INITIALIZE ALL VRIABLES*/ for (i = 0; i < NUMREGS; i++) { src_needed[i] = NEVER; release_time[i] = NEVER; path_ptr->src_needed[i] = NEVER; path_ptr->release_time[i] = NEVER; path_ptr->src_stage[i] = NO_STAGE; src_stage[i] = NO_STAGE; } for (i = 0; i < NUM_STAGES; i++) { done[i] = 0; /* should have done before (8/29/96) */ path_beg_cycles[i] = 0; path_end_cycles[i] = 0; beg_occupant[i] = NULL; end_occupant[i] = NULL; path_ptr->occ_cycles[i] = 0; } path_cycles = 0; path_ptr->extra_fh_time = 0; path_ptr->extra_fm_time = 0; path_ptr->shared_fm_time = 0; path_ptr->bc_extra_time = 0; path_ptr->dcache_misses = 0; path_ptr->icache_misses = 0; path_ptr->extra_latency = 0; path_ptr->latency_considered = 0; path_ptr->total_latency = 0; path_ptr->overlap = 0; if (naive_too) { path_ptr->bc_pc_nai_time = 0; path_ptr->wc_pc_nai_time = 0; path_ptr->bc_pipe_nai_time = 0; path_ptr->wc_pipe_nai_time = 0; } path_ptr->beg_cache_line = -1; path_ptr->end_cache_line = -1; memory_count += 4 * words_per_line * sizeof (int); path_ptr->beg_avail_time = (int *) calloc (words_per_line, sizeof (int)); path_ptr->end_avail_time = (int *) calloc (words_per_line, sizeof (int)); path_ptr->fm_end_avail_time = (int *) calloc (words_per_line, sizeof (int)); path_ptr->beg_word_num = (int *) calloc (words_per_line, sizeof (int)); path_ptr->first_line_delay = 0; path_ptr->last_line_delay = 0; path_ptr->delay_after_fm = 0; path_ptr->fm_dead_cycles = 0; inner_ff_trans = 0; inner_extra_cycles = 0; path_ptr->fm_encountered = 0; path_ptr->fh_encountered = 0; extra_nonoverlap = 0; memory_count += 4 * words_per_line * sizeof (int); avail_time = (int *) calloc (words_per_line, sizeof (int)); fm_avail_time = (int *) calloc (words_per_line, sizeof (int)); inner_end_avail_time = (int *) calloc (words_per_line, sizeof (int)); inner_fm_end_avail_time = (int *) calloc (words_per_line, sizeof (int)); memory_count += 12; inner_delay_after_fm_head = (struct delay_after_fm_node *) malloc (sizeof (struct delay_after_fm_node)); for (i = 0; i < words_per_line; ++i) { path_ptr->beg_avail_time[i] = 0; path_ptr->end_avail_time[i] = 0; path_ptr->fm_end_avail_time[i] = 0; path_ptr->beg_word_num[i] = -1; avail_time[i] = 0; inner_end_avail_time[i] = 0; inner_fm_end_avail_time[i] = 0; } /* 2. count how many instructions are in the path so that we can contiguously allocate some arrays which we use only for this path */ /* make a l.l. of instruction numbers so I can tell if the inst after delay slot of annulled branch is the target or fall thru inst 6/27/96 */ /*COUNTING INSTRUCTIONS IN THE PATH TO INITIALIZE THE DYNAMIC VARIABLES*/ tot_path_inst = 0; for (bptr = path_ptr->path_list; bptr; bptr = bptr->next) { /*LOOK AT path->path_list TO FIND EACH BLOCK IN PATH*/ inf_bptr = Locate_Inf_Block (inf, bptr -> block_number); block_inst = 0; for (inf_inst_ptr = inf_bptr -> instruct_list; inf_inst_ptr; inf_inst_ptr = inf_inst_ptr -> next) { /*LOOK AT THE INSTRUCTION LIST IN THE BLOCKS, EACH ONE MUST HAVE A CORRESPONDING INSTR IN THE STATS INSTRUCTION LIST */ stats_inst_ptr = Locate_Stats_Inst (node_ptr -> instruct_list, inf_inst_ptr -> number); if (stats_inst_ptr == NULL) { break; } ++tot_path_inst; inf_inst_ptr->block_inst = block_inst++; memory_count += 12; inst_num_temp = (struct inst_num_node *) malloc (sizeof (struct inst_num_node)); inst_num_temp->num = inf_inst_ptr->number; if (tot_path_inst == 1) { inst_num_head = inst_num_temp; inst_num_tail = inst_num_temp; inst_num_temp->next = NULL; inst_num_temp->prev = NULL; } else { inst_num_temp->next = NULL; inst_num_temp->prev = inst_num_tail; inst_num_tail->next = inst_num_temp; inst_num_tail = inst_num_temp; } } } if (verbose) { print_inst_num_list (inst_num_head); } /*IMPORTANT COMMENT */ /* path_pipeline: array of instruction numbers (NOT always consecutive) pipeline_cycles: only used in part where I am getting ready to display pipeline diagram and need to know about cycles during which no inst in the path is in any stage (jumping). This is an array of path_cycles after each inst in path. We need these numbers because an inst may complete in the ID, FWB, WB or CA and referring to this is simpler than checking which of the stages is the last. begin_use and end_use: when each inst in path begins/ends each stage dest_reg: name of destination register, a string dest_ready: the stage number in which an inst's destination is available stage_src1_needed, stage_src2_needed: stage number in which inst's operand reg is needed before proceeding inst_cond_code: stores whether this inst sets a cond code or uses a cond code (being a branch inst) or neither inst_data_type: stores whether this inst is a flop or integer inst */ if (tot_path_inst == 0) { error ("Error: No instructions in path."); } if (compute_pd) { memory_count += 2 * tot_path_inst * sizeof (int); path_pipeline = (int *) calloc (tot_path_inst, sizeof (int)); pipeline_cycles = (int *) calloc (tot_path_inst, sizeof (int)); } memory_count += tot_path_inst * (2 * sizeof (row) + sizeof (string) + 5 * sizeof (int)); begin_use = (row *) calloc (tot_path_inst, sizeof (row)); end_use = (row *) calloc (tot_path_inst, sizeof (row)); dest_reg = (string *) calloc (tot_path_inst, sizeof (string)); dest_ready = (int *) calloc (tot_path_inst, sizeof (int)); stage_src1_needed = (int *) calloc (tot_path_inst, sizeof (int)); stage_src2_needed = (int *) calloc (tot_path_inst, sizeof (int)); stage_src3_needed = (int *) calloc (tot_path_inst, sizeof (int)); inst_cond_code = (int *) calloc (tot_path_inst, sizeof (int)); inst_data_type = (int *) calloc (tot_path_inst, sizeof (int)); /*BOOKMARK- adding the new history array */ memory_count += tot_path_inst *sizeof (struct instr_history); history = (struct instr_history *) malloc (tot_path_inst * sizeof (struct instr_history )); for (i=0 ; i path_list; bptr; bptr = bptr->next) { /*KEEP TRACK OF SUBPATH IF ENABLED*/ if (beg_block != 0 && bptr->block_number != beg_block && subpath_started == NO) { continue; } if (beg_block != 0 && bptr->block_number == beg_block) { subpath_started = YES; } if (end_block != 0 && bptr->block_number != end_block && subpath_finished == YES) { break; } if (end_block != 0 && bptr->block_number == end_block) { subpath_finished = YES; } /* 4. Process each instruction in this path. */ /*AGAIN FIND THE INF BLOCK FOR CURRENT BLOCK NUMBER */ inf_bptr = Locate_Inf_Block (inf, bptr -> block_number); for (inf_inst_ptr = inf_bptr -> instruct_list; inf_inst_ptr; inf_inst_ptr = inf_inst_ptr -> next) { /*INSIDE THE INSTRUCTION LIST */ #ifdef __SIB_DEBUG__ if( sib_verbose ) { /* printc( " --- \n" ) ; printc( "Time_Path::inside FOR loop for instructions in basic block.\n" ) ; printc( "Timing for instruction : %s\n", inf_inst_ptr->inst ) ; printc( "Instruction Type : %d \t Cond Code : %d \n", inf_inst_ptr->inst_type ) ; printc( "Called Function : %s \n", inf_inst_ptr->called_func ) ; printc( "path_cycles = %d \n", path_cycles ) ; printc( " --- \n\n" ) ; */ } #endif // __SIB_DEBUG__ /*CHECK IF INSTRUCTION READ IS BOGUS ACCORDING TO "OPCODE" FILE*/ if (inf_inst_ptr->inst_type >= CARD_INST_SET) { printf ("Time_Path(): inst_type %d is too big for inst %d in %s", inf_inst_ptr->inst_type, inf_inst_ptr->number, node_ptr->name); exit(1); } /*LOCATE THE CATEGORY COUNTERPART OF CURRENT INSTRUCTION */ stats_inst_ptr = Locate_Stats_Inst (node_ptr -> instruct_list, inf_inst_ptr -> number); /*IF THE OBTAINED COUNTERPART EXISTS, CHECK IF THE SUBPATH HAS STARTED OR FINISHED.. IF ENABLED */ if (stats_inst_ptr) { /*If stats_inst_ptr is false, it indicates that we have reached a loop or a function */ if (beg_inst != NO_INST && bptr->block_number == beg_block && inf_inst_ptr->block_inst < beg_inst) { continue; /* subpath hasn't started yet */ } if (end_inst != NO_INST && bptr->block_number == end_block && inf_inst_ptr->block_inst > end_inst) { break; /* subpath has finished */ } /*INCREMENT THE INSTRUCTION NUMBER TO INDICATE THE NEW INSTRUCTION BEING SERVICED */ ++num_path_inst; /*IF PREVIOUS INSTRUCTION WAS A FUNCTION CALL... INSERT FUNCTION BY INSERTED FUNCTION WE MEAN THE FOLOWING, THE CALLED FUNCTION IS TREATED AS A CHILD NODE AND ITS INSTRUCTIONS ARE NOT IN THE LL. THE FUNCTION WILL BE TIMED BEFORE ITS PARENT NODE. SO WHEN WE REACH A FUNCTION CALL, WE SET A VARIABLE REMINDING USE TO ADD FUNCTION INFO AFTER THE CALL INSTRUCTION. SO WE INSERT FUNCTION INFORMATION */ if (inst_after_func == FUNC_WAIT) { inst_after_func = INSERT_FUNC; } } else { /*IF THE INSTRUCTION WAS NOT FOUND IN THE STATS LIST, IT WAS MOVED TO LOOP OR FUNCTION... FIND THAT LOOP OR FUNCTION */ child_loop = Locate_Child_Loop_Header (bptr->block_number, node_ptr); if (child_loop == NULL) continue; /* cure for segv 2/28 */ /* * Now we find what the exit block number for the child loop should be, * based on information stored with this path. * When the child loop is done, we either resume with the next * block in this path, or if this is a back edge we go to the * beginning of the path for the next iteration of the outer loop * (containing this current path as an iteration). But if this * is an exit path, and the child loop is the last block in the * last iteration of the outer loop containing this path, * we still need to find the exit block number to match. * This latter situation can occur when the child loop has a * goto that exits 2 or more loops at once. 1/28/97 */ /*CHECKING THE EXIT BLOCK OF THE CHILD LOOP */ if (bptr->next) exit_bnum = bptr->next->block_number; else if (path_ptr -> path_type_mask & BACKEDGE) exit_bnum = path_ptr -> path_list -> block_number; else if (path_ptr -> path_type_mask & EXIT) exit_bnum = path_ptr -> exit_list -> block_number; for (exit_bptr = child_loop -> exit_list; exit_bptr; exit_bptr = exit_bptr -> next) { if (exit_bptr -> block_number == exit_bnum) break; } if (exit_bptr == NULL) { error ("exit block pointer unexpectedly NULL"); } /* |||||||||||||||||||||||||||||||||||||||||||||||||||||| */ /* We also need a child first hit list (4/28) */ /* This is a similar situation to called functions below. */ /*NOW GETTING CACHE LISTS FROM THE CHILD LOOP */ if (tcase == WORST) Copy_Cache_List (&cnode_fh_list, exit_bptr->worst_case->fh_list); else Copy_Cache_List (&cnode_fh_list, exit_bptr->best_case->fh_list); Update_Cache_List(cnode_fh_list); inner_still_somemiss = Determine_Num_Still_SomeMiss(*fh_list, cnode_fh_list, tcase); if (verbose) printc ("# of inner loop f or m that are still some miss = %d\n", inner_still_somemiss); /* add extra time as a result of inner_still_somemiss in step 3b */ inner_new_miss2hit = Determine_Num_New_Miss2Hit(*fh_list, cnode_fh_list, tcase); /* now, update the fh_list: */ if (tcase == WORST) Union_Cache_List(fh_list, exit_bptr->worst_case->fh_list); else Union_Cache_List(fh_list, exit_bptr->best_case->fh_list); if (verbose) printc ("# of inner loop f or m that are new first hits = %d\n", inner_new_miss2hit); /* add time to path_cycles from inner_new_miss2hit in step 3b */ /* ||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ /* as with a function, we need to look for f->f transitions (4/18)*/ if (tcase == WORST) Copy_Cache_List(&cnode_fm_list, exit_bptr->worst_case->fm_list); else Copy_Cache_List(&cnode_fm_list, exit_bptr->best_case->fm_list); Update_Cache_List(cnode_fm_list); inner_ff_trans = Determine_Num_FirstMiss_FM_Trans (*fm_list, cnode_fm_list, tcase, &fut_trans); path_ptr->fm_encountered += inner_ff_trans; if (verbose) printc ("about to call Union_Cache_List for the fm_list\n"); if (tcase == WORST) Union_Cache_List(fm_list, exit_bptr->worst_case->fm_list); else Union_Cache_List(fm_list, exit_bptr->best_case->fm_list); if (verbose) { printc ("I have encountered %d f->f transitions so far.\n", inner_ff_trans); printc ("I have also seen %d future f-f transitions.\n", fut_trans); } } /* end of else from checking stats_inst_ptr */ /* 5. We need to address a possible data hazard. Let A be the inst just before the inner loop, and let B be the 1st inst in the inner loop. I assume that between A and B there can be at most one kind of data hazard. a) B is a typical int/flop, not a load or store, and needs a register operand in order to execute. If A's destination is (either of) B's src operands, B leaves ID when A finishes EX or FPEX. (shouldn't stall) b) On the other hand, A may be a load instruction, in which case the destination isn't known until A's CA stage, so B leaves ID when A leaves CA. c) If B is a store inst, and B's src is A's dest, then B leaves the EX when A leaves FPEX. This data hazard can only happen for flop reg's because in the integer pipeline, B would have to wait for A in EX, a structural hazard. d) If A is a cmp (cond code 1) and B is a branch inst (cond code 2), and they refer to the same (int or flop) cc, B finishes ID when A finishes EX. (2/27) */ /* ABOVE HAZARD DISCUSSION WAS VALID WHTN THE TA WORKED FOR SPARC ARCHITECTURE, BUT FOR PISA, IT IS NOT VALID */ /* new method, based on registers (5/15) */ delay = 0; delayed_register = -1; /*LOOKING AT DATA HAZARD SITUATIONS WHILE GOING INTO THE CHILD LOOP */ if (stats_inst_ptr == NULL && tcase == WORST) { if (verbose) { printc ("Here is the register info going into child loop:\n"); print_reg_info (exit_bptr->worst_case->src_needed, exit_bptr->worst_case->src_stage, release_time); } for (i = 0; i < NUMREGS; ++i) {/*BASICALLY WE CHECK WHEN EACH REGISTER IS FREED IN OUTER LOOP AND WHEN IT IS NEEDED IN INNER LOOP, IF MISMATCH, WE HAVE A DATA DELAY */ /*BOOKMARK- No need to add the parameterized stuff here cause the data hazard information will remains the same for ideal cache and non-ideal cache */ inner_src_stage[i] = exit_bptr->worst_case->src_stage[i]; diff = release_time[i] - (path_cycles - path_end_cycles[IF] + 1 + exit_bptr->worst_case->src_needed[i]); if (release_time[i] != NEVER && exit_bptr->worst_case->src_needed[i] != NEVER && diff > 0) { int2reg (i, reg); delayed_register = i; if (verbose) printc ("%s won't be ready in time: delay is %d\n", reg, diff); poss_update (&delay, diff, "5 reg delay"); } } /*BOOKMARK- another thing that we need to do is remove the previous branch mispreditcted variable to false. */ prev_Mispredicted_branch = false; history[num_path_inst].negate = true; prev_MEM_Miss = false; } else if (stats_inst_ptr == NULL && tcase == BEST) { if (verbose) { printc ("Here is the register info going into child loop:\n"); print_reg_info (exit_bptr->best_case->src_needed, exit_bptr->best_case->src_stage, release_time); } for (i = 0; i < NUMREGS; ++i) { inner_src_stage[i] = exit_bptr->best_case->src_stage[i]; diff = release_time[i] - (path_cycles - path_end_cycles[IF] + 1 + exit_bptr->best_case->src_needed[i]); if (release_time[i] != NEVER && exit_bptr->best_case->src_needed[i] != NEVER && diff > 0) { int2reg (i, reg); delayed_register = i; if (verbose) printc ("%s won't be ready in time: delay is %d\n", reg, diff); poss_update (&delay, diff, "5 reg delay"); } } } /*LOOKING AT THE CASE WHERE A FUNCTION IS BEING INSERTED AND WE NEED TO ADD THE DATA HAZARDS FOR CHILD FUNCTION*/ else if (inst_after_func == INSERT_FUNC && tcase == WORST) { if (verbose) { printc ("Here is the register info going into the function:\n"); print_reg_info (called_func->wc_src_needed, called_func->wc_src_stage, release_time); } /*FOR INSERTED FUNCTION, DATA HAZARDS ARE CHECKED, BY COMPARING WHEN REGISTERS ARE AVAILABLE TO WHEN THEY ARE REQUIRED BY CHILD FUNCTION*/ for (i = 0; i < NUMREGS; ++i) { inner_src_stage[i] = called_func->wc_src_stage[i]; diff = release_time[i] - (path_cycles - path_end_cycles[IF] + 1 + called_func->wc_src_needed[i]); if (called_func->wc_src_needed[i] != NEVER && release_time[i] != NEVER && diff > 0) { int2reg (i, reg); delayed_register = i; if (verbose) printc ("%s won't be ready in time: delay is %d\n", reg, diff); poss_update (&delay, diff, "5 reg delay"); } } /* also store the called function's end cache loading behavior 1/29/96 what about loops too??? */ for (i = 0; i < words_per_line; ++i) { inner_end_avail_time[i] = called_func->wc_end_avail_time[i]; inner_fm_end_avail_time[i] = called_func->wc_fm_end_avail_time[i]; } /*BOOKMARK- another thing that we need to do is remove the previous branch mispreditcted variable to false. */ prev_Mispredicted_branch = false; history[num_path_inst].negate = true; prev_MEM_Miss = false; } else if (inst_after_func == INSERT_FUNC && tcase == BEST) { if (verbose) { printc ("Here is the register info going into function:\n"); print_reg_info (called_func->bc_src_needed, called_func->bc_src_stage, release_time); } for (i = 0; i < NUMREGS; ++i) { inner_src_stage[i] = called_func->bc_src_stage[i]; diff = release_time[i] - (path_cycles - path_end_cycles[IF] + 1 + called_func->bc_src_needed[i]); if (called_func->bc_src_needed[i] != NEVER && release_time[i] != NEVER && diff > 0) { int2reg (i, reg); delayed_register = i; if (verbose) printc ("%s won't be ready in time: delay is %d\n", reg, diff); poss_update (&delay, diff, "5 reg delay"); } } } /*IF WE ARE HANDLING AN INNER NODE, FUNCTION OR LOOP, WE HAVE TO GET SOME INFORMATION FROM THE CHILD NODE */ if (stats_inst_ptr == NULL || inst_after_func == INSERT_FUNC) { if (tcase == WORST) { inner_pipeline_time = (inst_after_func == INSERT_FUNC) ? called_func -> wc_pipeline_time : exit_bptr -> worst_case -> pipeline_time; /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ if ((exit_bptr && exit_bptr->worst_case->need_polynomial) || (called_func && called_func->need_polynomial)) { tmp_term = (struct term*)malloc(sizeof(struct term)); memset(tmp_term , 0, sizeof(struct term)); memory_count += sizeof(struct term); tmp_term->power = 1; tmp_term->coefficient_ptr = ((inst_after_func == INSERT_FUNC) && (called_func->need_polynomial)) ? called_func->polynomial_worst_case : exit_bptr->worst_case->polynomial; if (path_polynomial == NULL) { path_polynomial = new_polynomial(); memory_count += sizeof(struct polynomial); } add_term_structure(path_polynomial, tmp_term); child_overcount_time += inner_pipeline_time; } #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ /*MARKED - I CAN ALSO ADD THE LATENCY VARIABLE FOR FREQUENCY MODEL HERE. BASICALLY I AM ADDING THE MEM LATENCY VARIABLE OF THE CHILD HERE TO THE EXTRA_LATENCY VARIABLE */ /*LET US TRY CALCULATING EQUAITON ALL THE TIME*/ //if (PARAMETERIZE) { extra_latency += (inst_after_func == INSERT_FUNC) ? called_func -> total_latency : exit_bptr -> worst_case -> total_latency; //} /* * also get inner shared time here 2/16/99 */ inner_shared_fm_time = (inst_after_func == INSERT_FUNC) ? called_func->shared_fm_time : exit_bptr->worst_case->shared_fm_time; /* save inner_num_paths for later: we need it after loop (3/7) */ /* here we need to take care of interpath data hazards with the union of paths */ if (inst_after_func == INSERT_FUNC) { for (stage = 0; stage < NUM_STAGES; stage++) { inner_beg_head[stage] = called_func -> wc_beg_head[stage]; inner_end_head[stage] = called_func -> wc_end_head[stage]; } for (i = 0; i < NUMREGS; ++i) /* we only need release time */ { inner_release_time[i] = called_func -> wc_release_time[i]; } } else { for (stage = 0; stage < NUM_STAGES; stage++) { inner_beg_head[stage] = exit_bptr->worst_case->beg_head[stage]; inner_end_head[stage] = exit_bptr->worst_case->end_head[stage]; } for (i = 0; i < NUMREGS; ++i) /* we only need release time */ { inner_release_time[i] = exit_bptr->worst_case->release_time[i]; } } } else /* for BEST case */ { if (inst_after_func == INSERT_FUNC) { inner_pipeline_time = called_func->bc_pipeline_time; /* we need to make sure the child's union has its end cycles flush with the end of the pipeline: one of the stages, usually the WB must have 1 as the end cycles. If none = 1, bring them all down by smallest - 1. (9/26) */ smallest = MAXINT; for (stage = 0; stage < NUM_STAGES; stage++) if (called_func->bc_end_head[stage] && called_func->bc_end_head[stage]->cycles < smallest) smallest = called_func->bc_end_head[stage]->cycles; if (verbose) printc ("smallest # of end cycles is %d\n", smallest); if (smallest > 1) for (stage = 0; stage < NUM_STAGES; ++stage) if (called_func->bc_end_head[stage]) called_func->bc_end_head[stage]->cycles -= smallest-1; for (stage = 0; stage < NUM_STAGES; stage++) { inner_occ_cycles[stage] = called_func->occ_cycles[stage]; if (called_func->bc_end_head[stage] && called_func->bc_beg_head[stage]) { inner_vacant_cycles[stage] = (called_func->bc_pipeline_time + called_func->bc_extra_time - called_func->bc_end_head[stage]->cycles + 1) - called_func->bc_beg_head[stage]->cycles + 1 - inner_occ_cycles[stage]; } else inner_vacant_cycles[stage] = called_func->bc_pipeline_time; inner_beg_head[stage] = called_func -> bc_beg_head[stage]; inner_end_head[stage] = called_func -> bc_end_head[stage]; } for (i = 0; i < NUMREGS; ++i) /* we only need release time */ { inner_release_time[i] = called_func -> bc_release_time[i]; } } else { inner_pipeline_time = exit_bptr->best_case->pipeline_time; /* we need to make sure the child's union has its end cycles flush with the end of the pipeline: one of the stages, usually the WB must have 1 as the end cycles. If none = 1, bring them all down by smallest - 1. (9/26) */ smallest = MAXINT; for (stage = 0; stage < NUM_STAGES; stage++) if (exit_bptr->best_case->end_head[stage] && exit_bptr->best_case->end_head[stage]->cycles < smallest) smallest = exit_bptr->best_case->end_head[stage]->cycles; if (verbose) printc ("smallest # of end cycles is %d\n", smallest); if (smallest > 1) for (stage = 0; stage < NUM_STAGES; ++stage) if (exit_bptr->best_case->end_head[stage]) exit_bptr->best_case->end_head[stage]->cycles -= smallest-1; for (stage = 0; stage < NUM_STAGES; stage++) { inner_occ_cycles[stage]=exit_bptr->best_case->occ_cycles[stage]; if (exit_bptr->best_case->end_head[stage] && exit_bptr->best_case->beg_head[stage]) { inner_vacant_cycles[stage] = (exit_bptr->best_case->pipeline_time + exit_bptr->best_case->extra_time - exit_bptr->best_case->end_head[stage]->cycles + 1) - exit_bptr->best_case->beg_head[stage]->cycles + 1 - inner_occ_cycles[stage]; } else inner_vacant_cycles[stage] = exit_bptr->best_case->pipeline_time; inner_beg_head[stage] = exit_bptr->best_case->beg_head[stage]; inner_end_head[stage] = exit_bptr->best_case->end_head[stage]; } for (i = 0; i < NUMREGS; ++i) /* we only need release time */ { inner_release_time[i] = exit_bptr->best_case->release_time[i]; } } } /*for best case*/ /*NOW PRINTING THE BEGINING AND ENDING PIPELINE INFORMATION FOR THE CHILD LOOP AS WELL AS PIPELINE INFO FOR THIS PATH SO FAR */ if (! only_fetch) { if (verbose) { printc ("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"); printc ("list of inner loop's union info\n"); for (stage = 0; stage < NUM_STAGES; stage++) for (p = inner_beg_head[stage]; p; p = p -> next) if (p->occupant) printc ("stage %d beg-cycle %d occupant %d\n", stage, p -> cycles, p -> occupant -> number); for (stage = 0; stage < NUM_STAGES; stage++) for (p = inner_end_head[stage]; p; p = p -> next) if (p->occupant) printc ("stage %d end-cycle %d occupant %d\n", stage, p -> cycles, p -> occupant -> number); printc ("going into union, the delay is %d\n", delay); printc (">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); } /* 6. A new way to insert the child loop/function, examining the stages l->r Structural hazard with the current path, data hazard with the current path and the child's beginning pipeline requirements (6/13) */ if (verbose) { printc ("Before the child, here is the end use of the stages:\n"); for (stage = 0; stage < NUM_STAGES; ++stage) printc ("%3d ", (path_end_cycles[stage] != 0) ? (path_cycles -path_end_cycles[stage] + 1) : 0); printc ("\n"); if (tcase == BEST) { print_array (inner_occ_cycles, 0, NUM_STAGES, "child occ cycles"); print_array (inner_vacant_cycles, 0, NUM_STAGES,"child vacant cyc"); } } } /* now find the earliest start of each stage in the child given structural constraints, 1 more than the current path finished */ /*FINDING THE EARLIEST TIME THAT THE CHILD NODE CAN START. */ for (stage = 0; stage < NUM_STAGES; ++stage) if (path_end_cycles[stage] == 0 || (only_fetch && stage > 0)) earliest_start[stage] = 0; else earliest_start[stage] = path_cycles - path_end_cycles[stage] + 2; /* it would be helpful to investigate the pipeline requirements of the child's beginning so that we can understand the effect of any data hazard, making it easier to measure how much a particular stage can compress (6/14) */ for (prev = 0, stage = 1; stage < NUM_STAGES; ++stage) { if (only_fetch) break; if (inner_beg_head[stage] == NULL) { continue; } poss_update (&earliest_start[stage], earliest_start[prev] + inner_beg_head[stage]->cycles - inner_beg_head[prev]->cycles, "child's own beg shape"); prev = stage; } if (verbose) { printc ("ere examining data dependencies, earliest start times:\n"); print_array (earliest_start, 0, NUM_STAGES, ""); } /* next determine the time when child finishes each stage, expressed as cycles in outer path, like end_use */ /*NOW DETERMINE WHEN THE CHILD NODE FINISHES*/ for (stage = 0; stage < NUM_STAGES; ++stage) { if (inner_end_head[stage] == NULL) { child_done[stage] = 0; continue; } /* * to fix segv in sprsin2, need to make sure ptrs * inner_beg_head and inner_end_head are not null 12/9/98 */ if (inner_beg_head[stage] && inner_end_head[stage]) { child_done[stage] = earliest_start[stage] + inner_pipeline_time - inner_end_head[stage]->cycles + 1 - inner_beg_head[stage]->cycles; if (verbose) { printc ("inner shared fm time = %d\n", inner_shared_fm_time); printc ("inner extra time = %d\n", inner_extra_time); printc ("calculating child_done...\n"); printc ("earliest_start[%d] = %d\n", stage, earliest_start[stage]); printc ("inner pipeline time = %d\n", inner_pipeline_time); printc ("inner_end_head[%d] cycles = %d\n", stage, inner_end_head[stage]->cycles); printc ("inner_beg_head[%d] cycles = %d\n", stage, inner_beg_head[stage]->cycles); printc ("thus, child_done[%d] = %d\n", stage, child_done[stage]); } } if (only_fetch) break; /* only 1 iteration necessary if c.o. */ } /* now deal with data hazards going in */ /*DEAL WITH THE DATA HAZARDS THAT HAPPEN DURING THE TRANSITION*/ if (delayed_register != -1) { int2reg (delayed_register, reg); if (verbose) printc ("delayed reg is %d, %s, needed before going to %s\n", delayed_register, reg, stage_name(inner_src_stage[delayed_register])); diff = release_time[delayed_register] + 1 - earliest_start[inner_src_stage[delayed_register]]; if (diff > 0) { earliest_start[inner_src_stage[delayed_register]] += diff; if (verbose) printc ("Start of %s delayed until %d\n", stage_name(inner_src_stage[delayed_register]), earliest_start[inner_src_stage[delayed_register]]); if (diff - inner_vacant_cycles[inner_src_stage[delayed_register]] > 0) { child_done[inner_src_stage[delayed_register]] += diff - inner_vacant_cycles[inner_src_stage[delayed_register]]; } } } /* now for the begin-cycles constraint for the child loop for the remaining stages after the delayed stage, if the occupants match, then delay that later stage as well (6/13) */ /*IF THERE IS A HAZARD DETECTED, THEN MAKE APPROPRIATE CHANGES TO THE CHILD DONE TIME */ if (delayed_register != -1 /* 23 sept 97: avoid segv */ && !only_fetch) { for (stage = inner_src_stage[delayed_register] + 1; stage < NUM_STAGES; ++stage) { if (inner_beg_head[stage] == NULL) continue; if (inner_src_stage[delayed_register] == -1) continue; /* to avoid segv 10/20/97 (Frank) */ if (inner_beg_head[stage]->occupant == inner_beg_head[inner_src_stage[delayed_register]]->occupant) { poss_update (&earliest_start[stage], earliest_start[inner_src_stage[delayed_register]] + inner_beg_head[stage]->cycles - inner_beg_head[inner_src_stage[delayed_register]]->cycles, "later stage also waits"); if (diff - inner_vacant_cycles[stage] > 0) { child_done[stage] += diff - inner_vacant_cycles[stage]; } } } } /* for those stages not affected by data hazard into child, we still need to consider the child's pipeline requirements relative to IF - I don't think I need to do this again (6/14) */ for (stage = 1; stage < NUM_STAGES; ++stage) { if (only_fetch) break; if (inner_beg_head[stage] == NULL) continue; poss_update (&earliest_start[stage], earliest_start[IF] + inner_beg_head[stage]->cycles - inner_beg_head[IF]->cycles, "child's own beg shape"); } if (verbose) { print_array (earliest_start, 0, NUM_STAGES, "child starts "); print_array (child_done, 0, NUM_STAGES, "child finishes"); } /* path_cycles is the largest value of child_done? (6/14) */ if (tcase == BEST) { path_cycles = child_done[0]; for (stage = 1; stage < NUM_STAGES; ++stage) poss_update (&path_cycles, child_done[stage], "path cycles"); } /* 7. Let me explain... We first update the cycle time for our path taking into consideration this inner loop. For each pipeline stage, find the minimum sum of the end-cycles for the path before the inner loop and the beg-cycles for the inner loop This min sum is called "cycle_diff". "old_path_cycles" stores the path's time before we alter it. The inner loop's first inst can begin to fetch at old_path_cycles - cycle_diff + 3 And this means the new value of path_cycles should be old_path_cycles - cycle_diff + 2 + time of the inner loop. Next we need to update path_beg_cycles and beg_occupant for a particular stage if the inner loop has an inst using this stage whereas the outer loop so far has not. We find the time the stage is first occupied relative to when the 1st inst starts to fetch, and add this to the path-time when the 1st inst starts to fetch. (2/23) */ if (tcase == WORST) {/* inner loop has union's l.l. */ /*MAKE SURE THAT AFTER THE CHILD IS ADDED, THE SHAPE OF THE PIPELINE DIDAGRAM IS MAINTAINED */ /* for a particular stage, we need to find the 1st and last occupant and cycles in the union's l.l. (3/10) */ for (stage = 0; stage < NUM_STAGES; stage++) { first_inner[stage] = inner_beg_head[stage]; last_inner[stage] = inner_end_head[stage]; for (p = inner_beg_head[stage]; p; p = p->next) if (p->cycles < first_inner[stage]->cycles) first_inner[stage] = p; for (p = inner_end_head[stage]; p; p = p->next) if (p->cycles < last_inner[stage]->cycles) last_inner[stage] = p; } /* new way to insert child - keep child's shape (7/4) */ if (verbose) printc ("***** ***** ***** ***** ***** ***** *****\n"); earliest_start[IF] = path_cycles - path_end_cycles[IF] + 2; for (stage = 1; stage < NUM_STAGES; ++stage) { if (first_inner[stage] == NULL) { earliest_start[stage] = 0; continue; } earliest_start[stage] = earliest_start[IF] + first_inner[stage]->cycles - first_inner[IF]->cycles; } if (verbose) { printc ("If the child had its way, here's is earliest start:\n"); print_array (earliest_start, 0, NUM_STAGES, ""); } diff = 0; for (stage = 0; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0 || path_end_cycles[stage] == 0) continue; poss_update (&diff, (path_cycles - path_end_cycles[stage] + 2) - earliest_start[stage], ""); } /* * We can't have any data or structural hazard in cache only. */ if (only_fetch) diff = delay = 0; if (verbose) { printc ("structural hazard delay is %d\n", diff); printc ("data hazard delay is %d\n", delay); } for (stage = 0; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; earliest_start[stage] += larger (diff, delay); } if (verbose) { printc ("in reality, earliest start is\n"); print_array (earliest_start, 0, NUM_STAGES, ""); } /* Now we really know when the child can safely begin entering * the pipeline stages, and in particular the IF stage. * update inner [fm] end avail times. 3/20/96 */ /*MARKED - WRAP AROUND CACHE INFO .. IGNORED WRT FREQUENCY MODEL*/ #ifdef TRASH if (wrap_around) { for (i = 0; i < words_per_line; ++i) { inner_end_avail_time[i] += earliest_start[0] - 1; inner_fm_end_avail_time[i] += earliest_start[0] - 1; } if (verbose) printc ("inner end avail time advanced by %d\n", earliest_start[0] - 1); } #endif for (stage = 0; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) { child_done[stage] = 0; continue; } if (first_inner[stage] && last_inner[stage]) child_done[stage] = earliest_start[stage] + inner_pipeline_time - first_inner[stage]->cycles - last_inner[stage]->cycles + 1; } /*BY NOW THE CHILD DONE TIME IS KNOWN*/ if (verbose) print_array (child_done, 0, NUM_STAGES, "child_done"); /* * Keep times when path finishes each stage because the child * is not an instruction in this path. (4/19/96) */ for (stage = 0; stage < NUM_STAGES; ++stage) done[stage] = child_done[stage]; old_path_cycles = child_done[0]; for (stage = 1; stage < NUM_STAGES; ++stage) poss_update (&old_path_cycles, child_done[stage], ""); if (verbose) { printc ("perhaps path_cycles should be %d\n", old_path_cycles); printc ("***** ***** ***** ***** ***** ***** *****\n"); } /* ============================================================== */ path_cycles = old_path_cycles; /*KEEP TRACK OF ANY EXTRA TIME THAT IS ACCUMULATED BY INNER NODE */ inner_extra_time = (inst_after_func == INSERT_FUNC) ? called_func -> extra_time : exit_bptr -> worst_case -> extra_time; /* * also capture the inner shared fm time 2/9/99 */ inner_shared_fm_time = (inst_after_func == INSERT_FUNC) ? called_func->shared_fm_time : exit_bptr->worst_case->shared_fm_time; /* accumulate the inner's first line delay with that of the current path so far 1/30/96 */ /*MARKED - CODE BELOW CONSIDERS WRAP AROUND CACHE... WE WILL IGNORE THAT FOR THE TIME BEING */ #ifdef TRASH if (wrap_around) { /*COPYING A LOT OF STUFF THAT HAS TO DEAL WITH WRAP AROUND CACHES*/ if (inst_after_func == INSERT_FUNC) { inner_beg_cache_line = called_func->wc_beg_cache_line; inner_end_cache_line = called_func->wc_end_cache_line; inner_first_line_delay = called_func->first_line_delay; inner_delay_after_fm = called_func->delay_after_fm; inner_last_line_delay = called_func->last_line_delay; inner_beg_word_num = called_func->wc_beg_word_num; inner_fm_dead_cycles = called_func->fm_dead_cycles; inner_last_fm = called_func->last_fm; inner_finish_load = called_func->wc_finish_load; inner_delay_after_fm_head = called_func->wc_delay_after_fm_head; } else { inner_beg_cache_line = exit_bptr->worst_case->beg_cache_line; inner_end_cache_line = exit_bptr->worst_case->end_cache_line; inner_first_line_delay = exit_bptr->worst_case->first_line_delay; inner_delay_after_fm = exit_bptr->worst_case->delay_after_fm; inner_last_line_delay = exit_bptr->worst_case->last_line_delay; inner_beg_word_num = exit_bptr->worst_case->beg_word_num; inner_fm_dead_cycles = exit_bptr->worst_case->fm_dead_cycles; inner_last_fm = exit_bptr->worst_case->last_fm; inner_finish_load = exit_bptr->worst_case->finish_load; inner_delay_after_fm_head = exit_bptr->worst_case->delay_after_fm_head; } /* for the moment at least, this is the most recent fm (3/1/96) */ fm_inst = inner_last_fm; /* * inner end avail time should also be based on child_done[0]. * For example, if the last inst in inner loop is the miss, then * the inner end avail times need to reflect this time. 2/22/96 * But the last inst in loop may not be the miss. */ if (inst_after_func != INSERT_FUNC) for (i = 0; i < words_per_line; ++i) { inner_end_avail_time[i] = exit_bptr->worst_case->end_avail_time[i] + child_done[0] + inner_finish_load - exit_bptr->worst_case->end_avail_time[0]; inner_fm_end_avail_time[i] = exit_bptr->worst_case->fm_end_avail_time[i] + child_done[0] - exit_bptr->worst_case->fm_end_avail_time[0]; } if (verbose) { printc ("Inner node info:\n"); printc (" inner pipeline time : %d\n", inner_pipeline_time); printc (" inner extra time : %d\n", inner_extra_time); printc (" inner shared fm time : %d\n", inner_shared_fm_time); printc (" inner 1st line delay : %d\n", inner_first_line_delay); printc (" inner delay after fm : %d\n", inner_delay_after_fm); printc (" inner last line delay: %d\n", inner_last_line_delay); printc (" inner fm dead cycles : %d\n", inner_fm_dead_cycles); printc (" inner last fm : %d\n", inner_last_fm); print_array (inner_beg_word_num, 0, words_per_line, " inner beg word num "); printc (" inner end cache line : %d\n", inner_end_cache_line); print_array (inner_end_avail_time, 0, words_per_line, " inner end avail time "); printc (" inner finish load : %d\n ", inner_finish_load); print_delay_after_fm_list (inner_delay_after_fm_head); } /* merge child's delay after fm list with path's own 3/18/96 */ for (inner_delay_after_fm_temp = inner_delay_after_fm_head; inner_delay_after_fm_temp; inner_delay_after_fm_temp = inner_delay_after_fm_temp->next) { memory_count += 12; delay_after_fm_temp = (struct delay_after_fm_node *) malloc (sizeof (struct delay_after_fm_node)); if (verbose) printc ("adding inner inst %d to path's delay after fm list\n", inner_delay_after_fm_temp->inst); delay_after_fm_temp->inst = inner_delay_after_fm_temp->inst; delay_after_fm_temp->cycles = inner_delay_after_fm_temp->cycles; delay_after_fm_temp->next = delay_after_fm_head; delay_after_fm_head = delay_after_fm_temp; } /* Detect if we can reduce inner first line delay. This * happens when the child starts after current cache line * is loaded into cache, or just experiences part of penalty. */ /* If 1st inst in child is same cache line as delay slot, * it only has to wait for itself, not the whole line. 2/1/96 * but I don't know what inst number that inst is !!!!! */ /*MARKED- THE CODE BELOW CALCULATES THE AVAIL TIME... CHECK OUT WHAT IT IS.. */ /*BASICALLY WHAT WE ARE DOING IS THIS- IF THE INNER LOOP/FUNC CACHE LINE IS THE SAME AS THE CACHE LINE OF OUTER NODE, WE HAVE TO CONSIDER ADDING OR SUBTRACTING DELAYS FOR WRAP AROUND CACHE */ if (cache_line == inner_beg_cache_line) { /* think more about this - if same cache line and child's 1st not miss, it doesn't know the 1st avail time, but outer should know. */ diff = 0; for (i = 0; i < words_per_line && inner_beg_word_num[i] != -1; ++i) { word_num = inner_beg_word_num[i]; /* * If the most recent miss is a firstmiss or an always miss, * look up the appropriate avail time array. 2/23/96 */ if (avail_time[word_num] > fm_avail_time[word_num]) { if (i == 0) diff += avail_time[word_num] - earliest_start[IF]; else diff += avail_time[word_num] - avail_time[word_num - 1] - 1; } else if (iterations_handled == 0) /* that's when fm=m 3/2/96*/ { if (i == 0) diff += fm_avail_time[word_num] - earliest_start[IF]; else diff += fm_avail_time[word_num] - fm_avail_time[word_num - 1] - 1; } if (verbose) { printc ("current cache line = %d\n", cache_line); printc ("inner beg cache line = %d\n", inner_beg_cache_line); printc ("1st child inst word num = %d\n", word_num); printc ("1st child inst avail time at %d\n", avail_time[word_num]); printc ("1st child inst FM avail time at %d\n", fm_avail_time[word_num]); printc ("earliest start [IF] = %d\n", earliest_start[IF]); printc ("diff = %d\n", diff); } } } /* The other alternative is that the current cache line differs * from the cache line at beginning of the child. This means * we are jumping to a different program line, and the child has * to wait for the outer's cache line to come in before executing. */ else { /* * The most recent miss may be from an inst categorized as * either a miss or first miss. 2/27/96 hurts funloop though * * don't even consider fm_last_word_loaded if this * is not the 1st iteration 3/12/96 */ if (iterations_handled > 0) { diff = last_word_loaded - earliest_start[IF] + 1; } else { diff = larger (last_word_loaded, fm_last_word_loaded) - earliest_start[IF] + 1; } if (verbose) { printc ("current cache line = %d\n", cache_line); printc ("inner beg cache line = %d\n", inner_beg_cache_line); printc ("last word loaded = %d\n", last_word_loaded); printc ("fm last word loaded = %d\n", fm_last_word_loaded); printc ("earliest start [IF] = %d\n", earliest_start[IF]); printc ("diff = %d\n", diff); } } if (diff > 0 && diff < inner_first_line_delay) { if (verbose) printc ("inner first line delay reduced from %d to %d\n", inner_first_line_delay, inner_first_line_delay - (max_jumping_delay - diff)); inner_first_line_delay -= max_jumping_delay - diff; poss_update (&inner_first_line_delay, 0, "inner first line delay can't be negative"); } else if (diff <= 0) { inner_first_line_delay = 0; if (verbose) printc ("inner first line delay set to 0\n"); } /*ASSUMPTION IS THAT THE TIMES BELOW ARE TAKEN CARE OF WRT THE FREQUENCY MODEL */ path_ptr->delay_after_fm += inner_delay_after_fm; path_ptr->last_line_delay += inner_last_line_delay; path_ptr->fm_dead_cycles += inner_fm_dead_cycles; if (verbose) { printc ("Adding %d from inner fm dead cyc\n", inner_fm_dead_cycles); printc ("inner beg cache line = %d\n", inner_beg_cache_line); printc ("adding %d from inner first line delay\n", inner_first_line_delay); printc ("adding %d from inner delay after fm\n", inner_delay_after_fm); printc ("adding %d from inner last line delay\n", inner_last_line_delay); } } /* end of "if (wrap_around)" */ #endif /*MARKED - THE IF (WRAP_AROUND) CONTROL BLOCK IS NOT LOOKED INTO TO FIND ANY WAY TO PUT THE FREQUENCY MODEL IN ...AS OF NOW */ if (verbose) { printc ("time of inner path is %d\n", inner_pipeline_time); printc ("extra time of inner path is %d\n", inner_extra_time); printc ("extra shared fm time is %d\n", inner_shared_fm_time); } old_path_cycles = path_cycles; cycle_diff = path_end_cycles[IF] + first_inner[IF] -> cycles; for (stage = IF+1; stage < NUM_STAGES; stage++) { if (first_inner[stage] == NULL) continue; /* stage may be vacant in loop (3/13) */ if (path_end_cycles[stage] + first_inner[stage] -> cycles < cycle_diff) { cycle_diff = path_end_cycles[stage] + first_inner[stage] -> cycles; } } if (verbose) printc ("cycle diff is %d\n", cycle_diff); //#ifdef TRASH weird_fix1(path_end_cycles, cycle_diff, &path_cycles); //#endif if (naive_too) { path_ptr->wc_pc_nai_time += (inst_after_func == INSERT_FUNC) ? called_func -> wc_pc_nai_time : exit_bptr->worst_case->pc_nai_time; path_ptr->wc_pipe_nai_time += (inst_after_func == INSERT_FUNC) ? called_func -> wc_pipe_nai_time : exit_bptr->worst_case->pipe_nai_time; if (verbose) printc ("path's pipe nai time = %d\n", path_ptr->wc_pipe_nai_time); } /* add time here */ /*THE TIMES BEING RECORDED BLOW MUST NOW TAKE INTO ACCOUNT THE FREQUENCY MODEL */ /*extra_fh_time, extra_fm_time HAVE NEW MEANING WHEN IN PARAMETER MODE, THEY WILL COUNT THE NUMBER OF TIMES MISS LATENCY MUST BE ADDED OR SUBTRACTED FROM THE PATH CYCLES */ if (PARAMETERIZE) path_ptr->extra_fh_time += inner_still_somemiss; else path_ptr->extra_fh_time += inner_still_somemiss*delay_list[0]; if (verbose) printc ("adding %d to path's extra time: f or m not yet i\n", inner_still_somemiss*delay_list[0]); if (!PARAMETERIZE) path_cycles += inner_new_miss2hit * delay_list[0]; /*MOVING THIS OUTSIDE THE PARAMETERIZE BLOCK */ extra_latency += inner_new_miss2hit; if (verbose) printc ("I just added %d to path_cycles: new f->i or m->i\n", inner_new_miss2hit * delay_list[0]); if (PARAMETERIZE) path_ptr->extra_fm_time += Min(fut_trans, inner_extra_time); else path_ptr->extra_fm_time += Min (fut_trans * delay_list[0], inner_extra_time); /* * also add the shared fm time 2/9/99 */ /*ASSUMPTION- INNER SHARED TIME CONSIDERS THE FREQUENCY MODEL */ path_ptr->shared_fm_time += inner_shared_fm_time; if (verbose) { printc ("I just added %d to the path's extra time\n", Min (fut_trans * delay_list[0], inner_extra_time)); printc ("which is the min of fut_trans*delay_list[0] = %d and", fut_trans * delay_list[0]); printc (" inner extra time = %d\n", inner_extra_time); } /* * find out what extra fm time was 2/19/99 */ inner_extra_fm_time = (inst_after_func == INSERT_FUNC) ? called_func -> extra_fm_time : exit_bptr -> worst_case -> extra_fm_time; if (verbose) printc ("inner extra fm time = %d\n", inner_extra_fm_time); /* * fix underestimation to spr2a for fm that are "shared" but * not counted as extra 2/19/99 */ /*MAY AHVE TO REPLACE THE PATH_CYCLES */ diff = inner_shared_fm_time - inner_extra_fm_time; if (iterations_handled > 0 && diff > 0) { extra_latency += diff; if (!PARAMETERIZE) path_cycles +=diff; if (verbose) printc ("just added %d from shared fm's not counted" " already\n", diff); } if (PARAMETERIZE) { diff = Min (inner_extra_time, (inner_ff_trans - fut_trans)); } else { diff = Min (inner_extra_time, (inner_ff_trans - fut_trans) * delay_list[0]); path_cycles += diff; } extra_latency += diff; /* I think it's also nec to add this inner extra time to my avail time arrays 2/9/96 */ #ifdef TRASH if (wrap_around) for (i = 0; i < words_per_line; ++i) { inner_end_avail_time[i] += diff; inner_fm_end_avail_time[i] += diff; } #endif if (verbose) { printc ("I just added %d to path_cycles from inner f->f\n", diff); printc ("which is min of (inner ff trans - fut trans)*dl[0] = %d ", (inner_ff_trans - fut_trans) * delay_list[0]); printc (" and inner_extra_time = %d\n", inner_extra_time); printc ("And I added %d to inner [fm] avail time entries\n", diff); } /* we also need to update path_end_cycles so that this penalty cannot be taken away by an inerted function or loop (4/27) */ /*In an attempt to cpture the correct effects of the code below I am using the compare equations fucntion */ done_once = false; for (stage = 0; stage < NUM_STAGES; ++stage) { if (PARAMETERIZE) { /* FIXIT- TRY TO PARAMETERIZE CODE IN THE ELSE */ } else { if (path_end_cycles[stage] >= (inner_ff_trans - fut_trans) * delay_list[0]) path_end_cycles[stage] -= (inner_ff_trans - fut_trans) * delay_list[0]; } } for (i = 0; i < NUM_STAGES; ++i) { if (path_beg_cycles[i] == 0 && first_inner[i]) { path_beg_cycles[i] = first_inner[i] -> cycles - first_inner[IF] -> cycles + old_path_cycles - cycle_diff + 3; beg_occupant[i] = first_inner[i] -> occupant; } if (last_inner[i]) { /* don't update unused stage if we returned from a func (how?)*/ end_occupant[i] = last_inner[i] -> occupant; path_end_cycles[i] = last_inner[i] -> cycles; } } } else /* for BEST case we need the last to begin and first to end */ { for (stage = 0; stage < NUM_STAGES; stage++) { first_inner[stage] = inner_beg_head[stage]; last_inner[stage] = inner_end_head[stage]; for (p = inner_beg_head[stage]; p; p = p -> next) if (p -> cycles > first_inner[stage] -> cycles) /* NB > */ first_inner[stage] = p; for (p = inner_end_head[stage]; p; p = p -> next) if (p -> cycles > last_inner[stage] -> cycles) /* NB > */ last_inner[stage] = p; } inner_extra_time = (inst_after_func == INSERT_FUNC) ? called_func -> bc_extra_time : exit_bptr -> best_case -> extra_time; if (verbose) { printc ("time of inner path is %d\n", inner_pipeline_time); printc ("extra time of inner path is %d\n", inner_extra_time); printc ("extra shared fm time is %d\n", inner_shared_fm_time); } if (naive_too) { path_ptr->bc_pc_nai_time += (inst_after_func == INSERT_FUNC) ? called_func->bc_pc_nai_time : exit_bptr->best_case->pc_nai_time; path_ptr->bc_pipe_nai_time += (inst_after_func == INSERT_FUNC) ? called_func->bc_pipe_nai_time: exit_bptr->best_case->pipe_nai_time; if (verbose) printc ("path's bc pipe nai time = %d\n", path_ptr->bc_pipe_nai_time); } /* add time here for best case */ /* extra time means cycles we need to subtract from all iterations after the first. If this is the first iteration, find the extra time so the tbc can subtract it from later iter (6/15) */ path_ptr->bc_extra_time += inner_ff_trans * delay_list[0]; if (verbose) printc ("I just added %d to extra time : f->f trans\n", inner_ff_trans * delay_list[0]); /* we also need to update path_end_cycles so that this penalty cannot be taken away by an inerted function or loop */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (path_end_cycles[stage] >= (inner_ff_trans - fut_trans) * delay_list[0]) path_end_cycles[stage] -= (inner_ff_trans - fut_trans) * delay_list[0]; } for (i = 0; i < NUM_STAGES; ++i) { if (path_beg_cycles[i] == 0 && first_inner[i]) { path_beg_cycles[i] = earliest_start[i]; beg_occupant[i] = first_inner[i] -> occupant; } if (last_inner[i]) { /* don't update unused stage if we returned from a func (how?)*/ end_occupant[i] = last_inner[i] -> occupant; path_end_cycles[i] = last_inner[i] -> cycles; } } } if (verbose) { printc ("\nextra fh time in the path so far is %d\n", path_ptr->extra_fh_time); printc ("extra fm time in the path so far is %d\n", path_ptr->extra_fm_time); global_info (' '); print_path_fields (path_ptr, tcase); } inst_after_loop = 1; if (stats_inst_ptr == NULL) { inst_after_child_loop = true; break; /* in case of inner loop, we have no inst to process */ } } /* 8. Function calls need to be handled in a similar way as we did the inner loops. (started 3/23) */ if (strcmp (stats_inst_ptr -> func, "") != 0) { /*IF STATS_INST_PTR IS NOT NULL AND THIS INSTR IS A FUNC CALL, GET READY TO SERVICE THAT FUNC */ if (verbose) printc ("\n This inst is a CALL\n"); called_func = Locate_Child_Node (stats_inst_ptr->func, stats_inst_ptr->instance, node_ptr->child); if (called_func == NULL) { error (" But I can't find that function!"); } if (verbose) printc ("This instruction calls %s\n", called_func->name); /* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ /* We also need a child first hit list (4/28) */ /* This code also goes for loops in step 2b, but there we can add the times without having to wait. */ if (tcase == WORST) Copy_Cache_List (&cnode_fh_list, called_func->exit_list->worst_case->fh_list); else Copy_Cache_List (&cnode_fh_list, called_func->exit_list->best_case->fh_list); Update_Cache_List(cnode_fh_list); inner_still_somemiss = Determine_Num_Still_SomeMiss(*fh_list, cnode_fh_list, tcase); if (verbose) printc("# of inner function f or m that are still some miss = %d\n", inner_still_somemiss); /* don't update path's extra time until we insert the function: */ /* path_ptr->extra_time += inner_still_somemiss*delay_list[0]; */ inner_new_miss2hit = Determine_Num_New_Miss2Hit(*fh_list, cnode_fh_list, tcase); /* now update the fh_list: */ if (tcase == WORST) Union_Cache_List(fh_list, called_func->exit_list->worst_case->fh_list); else Union_Cache_List(fh_list, called_func->exit_list->best_case->fh_list); if (verbose) printc ("# of inner function f or m that are new first hits = %d\n", inner_new_miss2hit); /* don't update path_cycles yet: */ /* path_cycles += inner_new_miss2hit * delay_list[0]; */ /* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ if (tcase == WORST) Copy_Cache_List (&cnode_fm_list, called_func->exit_list->worst_case->fm_list); else Copy_Cache_List (&cnode_fm_list, called_func->exit_list->best_case->fm_list); Update_Cache_List(cnode_fm_list); /* I think we need a *cumulative* total of inner_ff_trans (4/22) */ inner_ff_trans = Determine_Num_FirstMiss_FM_Trans (*fm_list, cnode_fm_list, tcase, &fut_trans); path_ptr->fm_encountered += inner_ff_trans; if (verbose) printc ("about to call Union_Cache_List for the fm_list\n"); if (tcase == WORST) Union_Cache_List(fm_list, called_func->exit_list->worst_case->fm_list); else Union_Cache_List(fm_list, called_func->exit_list->best_case->fm_list); if (verbose) { printc ("I have encountered %d f->f transitions so far.\n", inner_ff_trans); printc ("I have also seen %d future f-f transitions.\n", fut_trans); } inst_after_func = FUNC_WAIT; /* data hazards handled in steps 5,6 */ } /* 9. For each instruction, find its condition code (1 for a compare, 2 for a branch and 0 otherwise) and also keep track of whether it's a flop. */ inst_cond_code[num_path_inst] = inf_inst_ptr -> cond_code; inst_data_type[num_path_inst] = is_flop(inf_inst_ptr -> inst) ? FLOP : INTEGER; /* prev_cache_line is the cache line containing the prev inst. */ /*MARKED- MORE WRAP_AROUND STUFF THAT IS IGNORED FOR THE TIME BEING */ #ifdef TRASH if (wrap_around) { if (num_path_inst > 0) prev_cache_line = cache_line; cache_line = inf_inst_ptr->prog_offset / words_per_line; if (inner_end_cache_line == -1 && prev_cache_line != -1 && prev_cache_line != cache_line) jumping = YES; else if (inner_end_cache_line != -1 && inner_end_cache_line != cache_line) jumping = YES; else jumping = NO; } #endif if (verbose) printc ("-------------------------------------------------\n"); /*BOOKMARK- no more annulment */ /*if (inf_inst_ptr->number == inst2annul) printc ("I need to be annulled\n");*/ if (verbose) printc ("inst # %d (%d) %s\t", inf_inst_ptr -> number, inf_inst_ptr -> prog_offset, inf_inst_ptr->inst); if (jumping && verbose) printc ("JUMPING\t"); if (jumping && ! wrap_around) error ("Time_Path(): jumping but wrap_around not defined!\n"); /* 10. Find out the inst category so we can assess any applicable penalty. */ /*FINDING THE CACHE CATEGORIZATION FOR CURRENT INSTRUCTION */ if (tcase == WORST) { switch (stats_inst_ptr->cat_list->worst_case) { case Firstmiss_INST_CATEGORY : { path_ptr->last_miss_cat = 'f'; if (verbose) printc ("First Miss\n"); /* if a first miss (or miss) ever becomes a first hit, treat as hit in pipeline and add delay_list[0] to extra_time and append this inst to fh list (4/28) */ /*IT MAY HAPPEN THAT CACHE CATEGORIZATIONS CHANGE DURING THE ANALYSIS BELOW, IF A fm TURNS INTO A fh, WE ADD A MISS PENALTY TO EXTRA TIME */ if (SomeMiss_FirstHit_Trans(stats_inst_ptr->cat_list, tcase)) { assess_penalty = false; if (PARAMETERIZE) path_ptr->extra_fh_time += 1; else path_ptr->extra_fh_time += delay_list[0]; Add_Block_To_Cache_List (stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fh_list); break; } /* We have to investigate a possible f->f transition. In this case we treat it as a hit here in the pipeline, but put the miss penalty "on the side". Time_Worst_Case will need to take this penalty away for all iter after the 1st. (4/13) */ /* need to call other Cache functions as in Time_Block (4/17) */ /*WE ALSO CHECK FOR A fm->fm TRANSITION. IF FOUND, THE fm IS A HIT*/ in_cache = Inst_In_Cache(*fm_list, stats_inst_ptr->line_number); if (in_cache) { assess_penalty = false; } else { /*IF NOT A TRANSITION, COUNT THE FIRST MISS*/ count_on_side = common_fm(inf, node_ptr, path_ptr, inf_inst_ptr); ++(path_ptr->fm_encountered); if (verbose) printc ("I just encountered a fm in Time_Path\n"); if (ever_transition('f','f',stats_inst_ptr->cat_list, tcase)) { if (verbose) printc ("We will have a f->f transition\n"); assess_penalty = false; last_fm_f2f = YES; pending_dead_cycles = 0; if (PARAMETERIZE) path_ptr->extra_fm_time +=1; else path_ptr->extra_fm_time += delay_list[0]; /* * new way to count this ff with shared for spr2a 2/18/99 */ if (! count_on_side) { if (verbose) printc ("counting a f->f with the shared; now %d cycles\n", path_ptr->shared_fm_time); if (PARAMETERIZE) path_ptr->shared_fm_time += 1; else path_ptr->shared_fm_time += delay_list[0]; } } /*bruhaha*/ else { assess_penalty = (fm_status == MISS) ? true : false; last_fm_f2f = NO; } /* * If 2 paths share the same first miss, it's possible that * we should count this miss penalty on the side. This * was determined when we called common_fm at the beginning * of Time_Path. 2/9/99 */ if (count_on_side) { assess_penalty = false; /*path_ptr->extra_fm_time += delay_list[0];*/ if (PARAMETERIZE) path_ptr->shared_fm_time += 1; else path_ptr->shared_fm_time += delay_list[0]; if (verbose){ printc ("Shar d first miss among paths based on common_fm()\n"); printc ("path d shared fm time is now %d\n", path_ptr->number, path_ptr->shared_fm_time); } } Add_Block_To_Cache_List(stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fm_list); } /* Do something similar for fm - in twc we need ability * to see a first miss near end of first iter affecting inst * at beginning of 2nd iter. 2/5/96 * But notice we are not adding any delay here, so we assume * current word is immediately ready (extra time figured aside). * That's why we have delay_list[i] - delay_list[0] below. */ /*MARKED- MORE WRAP AROUND CODE NOT ANALYZED FOR PARAMETERIZATION */ #ifdef TRASH if (wrap_around) { curr_time = (num_path_inst > 0) ? end_use[num_path_inst - 1][IF] + 1 : 1; word_num = inf_inst_ptr -> prog_offset % words_per_line; /* we could delay if a fm follows a fm 2/8/96 */ if (jumping) { diff = largest_entry (fm_avail_time, 0, words_per_line) - curr_time + 1; if (verbose) printc ("f->f jump diff = %d\n", diff); poss_update (&diff, 0, "diff can't be negative"); path_ptr->delay_after_fm += diff; } /* how about jumping from a first hit to a first miss: */ if (jumping) { diff = largest_entry (avail_time, 0, words_per_line) - curr_time + 1; if (verbose) printc ("i,f jump diff = %d\n", diff); poss_update (&diff, 0, "diff can't be negative"); /* * where do I put this value ? */ i2f_jump_delay = diff; } fm_avail_time[word_num] = curr_time; next_word_num = (word_num % 2 == 0) ? (word_num + 1) : (word_num - 1); fm_avail_time[next_word_num] = curr_time + delay_list[1] - delay_list[0]; for (i = 2, next_word_num = word_num + ((word_num % 2 == 0) ? 2:1); i < words_per_line; ++i, ++next_word_num) { next_word_num %= words_per_line; fm_avail_time[next_word_num] = curr_time + delay_list[i] - delay_list[0]; } if (iterations_handled == 0 && assess_penalty) for (i = 0; i < words_per_line; ++i) { avail_time[i] = fm_avail_time[i] + delay_list[0]; fm_avail_time[i] += delay_list[0]; /*2/15/96*/ } if (verbose) print_array (fm_avail_time, 0, words_per_line, "fm avail time"); fm_last_word_loaded = largest_entry (fm_avail_time, 0, words_per_line); fm_inst = inf_inst_ptr->number; } /*if (wrap_around) */ #endif break; } case Firsthit_INST_CATEGORY : { /*IF WE FIND A FIRST HIT WE DO FOLLOWING... FIRST INCREMENT NUMBER OF FIRST HIT ENCOUNTERED */ if (verbose) printc ("First Hit \n"); ++(path_ptr->fh_encountered); /*IF ALL fh MUST BE TREATED AS MISSES DO SO */ if (fh_status == MISS) assess_penalty = true; else { /*ELSE TREAT AS MISS IF ALREADY IN BLOCK LIST */ if (Is_In_Block_List (inf_bptr->block_number, active_fh_list)) { assess_penalty = true; } else { /*ELSE IT IS HIT */ assess_penalty = false; Append_Block_Blocklist (inf_bptr->block_number, &active_fh_list); } } /* * When a first hit is treated as a miss, we need to set avail * times. 2/22/96 */ /*MARKED - MORE WRAP AROUND CACHE STUFF */ #ifdef TRASH if (wrap_around && assess_penalty) { curr_time = (num_path_inst > 0) ? end_use[num_path_inst - 1][IF] + 1 : 1; word_num = inf_inst_ptr -> prog_offset % words_per_line; fh_order_of_fill = find_order_of_fill (word_num); for (i = 0; i < words_per_line; ++i) avail_time[i] = curr_time + delay_list[fh_order_of_fill[i]]; last_word_loaded = largest_entry(avail_time, 0, words_per_line); if (verbose){ printc ("curr time = %d\n", curr_time); printc ("word num = %d\n", word_num); print_array (fh_order_of_fill, 0, words_per_line, "fh order of fill"); print_array (avail_time, 0, words_per_line, "avail times"); } } #endif break; } case Miss_INST_CATEGORY : { /*HANDLING ALWAYS MISS INSTRUCTIONS */ path_ptr->last_miss_cat = 'm'; if (verbose) printc ("Miss \n"); if (pipeline_only) { assess_penalty = true; break; /* less work to do: all inst are miss */ } /* if a (first miss or) miss ever becomes a first hit, treat as hit in pipeline and add delay_list[0] to extra_time and append this inst to fh list (4/28) */ /*BELOW WE HAVE CODE TO HANDLE THE ALWAYS MISS->FIRST HIT TRANSITION OF SOME INSTRUCTIONS */ if (SomeMiss_FirstHit_Trans(stats_inst_ptr->cat_list, tcase)) { assess_penalty = false; if (PARAMETERIZE) path_ptr->extra_fh_time +=1; else path_ptr->extra_fh_time += delay_list[0]; Add_Block_To_Cache_List (stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fh_list); break; } /* more from Time_Block: (4/21) */ same_cache_line = false; /*CHECK OUT THE IMPORTANT COMMENT BELOW */ /* Checking for group miss. * If timing the 1st instruction in a block, and that * inst is a miss, then check to see if the last * instruction from the previous is in the same cache * line. If so, then treat this instruction as a hit. * If the previous block represents a loop, then for * every predicate block (to the current block) that * is in the loop, the last instruction must be in the * same cache line. None of these last instructions * can be a function call. */ if (inf_inst_ptr == inf_bptr->instruct_list) { prev_block_ptr = Find_Prev_Path_Block(inf_bptr->block_number, path_ptr->path_list); if (prev_block_ptr && ! prev_block_ptr->is_loop_node) { prev_block_inf_ptr = Locate_Inf_Block(inf, prev_block_ptr->block_number); if (prev_block_inf_ptr) { pblock_linst = Find_Last_Inst_Num( prev_block_inf_ptr->instruct_list); pblock_linst_ptr = Locate_Stats_Inst( node_ptr->instruct_list, pblock_linst); if ((pblock_linst_ptr) && (Inst_Same_Cache_Line(stats_inst_ptr, pblock_linst_ptr)==true) && (strcmp(pblock_linst_ptr->func, "") == 0) ) same_cache_line = true; } } else if (prev_block_ptr) { pred_blocklist = Find_Pred_Blocklist( (block_element*)( node_ptr->inf_ptr ), inf_bptr->block_number) ; inf_loop_ptr = Find_Loop(node_ptr->inf_ptr, prev_block_ptr->block_number); /* Create node name loop */ itoa (inf_loop_ptr->number, loop_number); strcpy(loop_name, Loop_PREFIX); strcat(loop_name, " "); strcat(loop_name, loop_number); same_cache_line = true; for (block_ptr = pred_blocklist; block_ptr && same_cache_line; block_ptr = block_ptr->next) { if (Is_In_Block_List(block_ptr->block_number, inf_loop_ptr->block_list)==true) { prev_inf_block_ptr = Locate_Inf_Block( node_ptr->inf_ptr, block_ptr->block_number); max_inst_num = Find_Max_Inst( prev_inf_block_ptr->instruct_list); if (max_inst_num >= 0) { cnode = Locate_Child_Node(loop_name, node_ptr->instance, node_ptr->child); inst_ptr2 = Locate_Stats_Inst(cnode->instruct_list, max_inst_num); if (inst_ptr2) /* cure segv sym 4/20/99 */ same_cache_line = ( (Inst_Same_Cache_Line( stats_inst_ptr, inst_ptr2)) && (strcmp(inst_ptr2->func, "") == 0) ); } else same_cache_line = false; } } } } /*MARKED - MORE WRAP AROUND CACHE STUFF */ if (! same_cache_line) { #ifdef TRASH if (wrap_around) { /* We read the access times to find when words will be * loaded into cache. The previous instruction finished * fetching at end_use[num_path_inst - 1][IF] */ curr_time = (num_path_inst > 0) ? end_use[num_path_inst - 1][IF] + 1 : 1; word_num = inf_inst_ptr->prog_offset % words_per_line; order_of_fill = find_order_of_fill (word_num); for (i = 0; i < words_per_line; ++i) avail_time[i] = curr_time + delay_list[order_of_fill[i]]; /* when do I figure out the beginning cache line info to keep for this path? 1/28/96 */ if (verbose) printc ("curr time = %d\n", curr_time); /* if we're jumping to a miss, delay some more ! */ /* beware: delay cannot be negative. We may be printing some negative values here. 2/6/96 */ /* I'm adding this delay to delay after fm to avoid the underestimation to sum 2/29/96 */ /* but be careful - it does not apply if we are returning from child because there is another kind of delay for that 3/11/96 */ if (jumping && fm_last_word_loaded > 0 && inst_after_func != INSERT_FUNC) { if (verbose) printc ("delay after fm, jumping to a miss ??? %d\n", fm_last_word_loaded - curr_time + 1); diff = fm_last_word_loaded - curr_time + 1; poss_update (&diff, 0, "diff cannot be negative"); path_ptr->delay_after_fm += diff; if (verbose) printc ("That brings path's delay after fm to %d.\n", path_ptr->delay_after_fm); if (iterations_handled == 0) for (i = 0; i < words_per_line; ++i) avail_time[i] += extra_delay; } if (jumping && last_word_loaded > 0) { extra_delay = last_word_loaded - curr_time + 1; /* extra delay should not be negative */ poss_update (&extra_delay, 0, "extra delay can't be neg"); if (verbose) { printc ("Last word loaded at %d\n", last_word_loaded); printc ("extra delay is %d\n", extra_delay); } for (i = 0; i < words_per_line; ++i) { avail_time[i] += extra_delay; } } print_array (avail_time, 0, words_per_line, "load times"); /* find out when last word is loaded */ last_word_loaded = largest_entry (avail_time, 0, words_per_line); if (verbose) printc ("last word loaded at %d\n", last_word_loaded); assess_penalty = true; } #endif /* If no wrap-around fill, just assess const miss penalty. */ /* else *//*uncomment if removeing ifdef from wrap around */ { assess_penalty = true; } } else assess_penalty = false; if ((fh_status == HIT) && (ever_transition('m', 'i', stats_inst_ptr->cat_list, WORST))) { Add_Block_To_Cache_List(stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fh_list); } break; } case Hit_INST_CATEGORY : { /*HANDLING THE HIT INSTRUCTIONS... EASIEST IN THE LOT */ if (verbose) printc ("Hit \n"); /* something else from Time_Block (4/21) */ if (fm_status == MISS) Add_Block_To_Cache_List (stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fm_list); assess_penalty = false; break; } default : { error ("Unknown inst category"); } } } else /* for BEST case */ { switch (stats_inst_ptr->cat_list->best_case) { case Firstmiss_INST_CATEGORY : { if (verbose) printc ("First Miss \n"); /* no such thing as f->i in best case (6/6) */ in_cache = Inst_In_Cache(*fm_list, stats_inst_ptr->line_number); if (in_cache) { assess_penalty = false; } else { if (verbose) printc ("I just encountered a fm in Time_Path\n"); /* if f->f, assess penalty only on 1st iter: this is when the fm_status is miss; there is no extra time At higher level, we have to subtract the miss penalty on subsequent iter of outer loop (6/14) */ if (ever_transition('f','f',stats_inst_ptr->cat_list, tcase)) { if (verbose) printc ("We will have a f->f transition\n"); assess_penalty = (fm_status == MISS) ? true : false; } else { assess_penalty = (fm_status == MISS) ? true : false; } Add_Block_To_Cache_List(stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fm_list); } printf("ASSES PEN %d \n",assess_penalty); break; } case Firsthit_INST_CATEGORY : { if (verbose) printc ("First Hit \n"); if (fh_status == MISS) assess_penalty = true; else { if (Is_In_Block_List (inf_bptr->block_number, active_fh_list)) { assess_penalty = true; } else { assess_penalty = false; Append_Block_Blocklist (inf_bptr->block_number, &active_fh_list); } } printf("ASSES PEN %d \n",assess_penalty); break; } case Miss_INST_CATEGORY : { if (verbose) printc ("Miss \n"); /* no such thing as m->i in best case (6/6) */ same_cache_line = false; /* Checking for group miss. * If timing the 1st instruction in a block, and that * inst is a miss, then check to see if the last * instruction from the previous is in the same cache * line. If so, hen treat this instruction as a hit. * If the previous block represents a loop, then for * every predicate block (to the current block) that * is in the loop, the last instruction must be in the * same cache line. None of these last instructions * can be a function call. */ if (inf_inst_ptr == inf_bptr->instruct_list) { prev_block_ptr = Find_Prev_Path_Block(inf_bptr->block_number, path_ptr->path_list); if (prev_block_ptr && ! prev_block_ptr->is_loop_node) { prev_block_inf_ptr = Locate_Inf_Block(inf, prev_block_ptr->block_number); if (prev_block_inf_ptr) { pblock_linst = Find_Last_Inst_Num( prev_block_inf_ptr->instruct_list); pblock_linst_ptr = Locate_Stats_Inst( node_ptr->instruct_list, pblock_linst); if ((pblock_linst_ptr) && (Inst_Same_Cache_Line(stats_inst_ptr, pblock_linst_ptr)==true) && (strcmp(pblock_linst_ptr->func, "") == 0) ) same_cache_line = true; } } else if (prev_block_ptr) { pred_blocklist = Find_Pred_Blocklist( (block_element*)( node_ptr->inf_ptr ), inf_bptr->block_number ) ; inf_loop_ptr = Find_Loop(node_ptr->inf_ptr, prev_block_ptr->block_number); /* Create node name loop */ itoa (inf_loop_ptr->number, loop_number); strcpy(loop_name, Loop_PREFIX); strcat(loop_name, " "); strcat(loop_name, loop_number); same_cache_line = true; for (block_ptr = pred_blocklist; block_ptr && same_cache_line; block_ptr = block_ptr->next) { if (Is_In_Block_List(block_ptr->block_number, inf_loop_ptr->block_list)==true) { prev_inf_block_ptr = Locate_Inf_Block( node_ptr->inf_ptr, block_ptr->block_number); max_inst_num = Find_Max_Inst( prev_inf_block_ptr->instruct_list); if (max_inst_num >= 0) { cnode = Locate_Child_Node(loop_name, node_ptr->instance, node_ptr->child); inst_ptr2 = Locate_Stats_Inst(cnode->instruct_list, max_inst_num); if (inst_ptr2) /* cure segv sym 4/20/99 */ same_cache_line = ( (Inst_Same_Cache_Line( stats_inst_ptr, inst_ptr2) ) && (strcmp(inst_ptr2->func, "") == 0) ); } else same_cache_line = false; } } } } if (! same_cache_line) assess_penalty = true; else assess_penalty = false; assess_penalty = true; printf("ASSES PEN %d \n",assess_penalty); break; } case Hit_INST_CATEGORY : { if (verbose) printc ("Hit \n"); /* need to detect a future h->i (8/2) */ if (ever_transition ('h', 'i', stats_inst_ptr->cat_list, tcase)) { if (verbose) printc ("There will be a h->i transition in best case.\n"); } /* something else from Time_Block */ if (fm_status == MISS) Add_Block_To_Cache_List (stats_inst_ptr->line_number, stats_inst_ptr->inst_number, node_ptr->instance, stats_inst_ptr->cat_list->best_case, stats_inst_ptr->cat_list->worst_case, stats_inst_ptr->cat_list, fm_list); assess_penalty = false; printf("ASSES PEN %d \n",assess_penalty); break; } default : { error ("Unknown inst category"); } } } /*end of else for BEST CASE */ /* * If this inst is annulled branch, and we can tell that * 2 instructions from now we are falling through and not * taking the branch, then we need to annul result of next inst. * (11/15/96) */ /*annul code used to be here .. but it was commented out, so deleted. also the inst2annul is never defined... meaning all annul code is useless*/ /* 11. next, let's determine the correct data size, whether to use SINGLE_SIZE or DOUBLE_SIZE. For integer instructions, it doesn't matter. For most flops, we can look at inf_inst_ptr->data_type, execpt for conversions */ size = set_instr_size(inf_inst_ptr); /* 12. use a special variable that just holds the end cycles for the current instruction being added to the path modified 2/1 */ /*GETTING READY TO HANDLE THE OTHER PIPELINE STAGES, IF STAGE DONE SO FAR */ /* * treat this as a nop if we need to annul (11/15/96) */ /*BOOKMARK- no more annulled instructions...*/ /*FIND OUT WHAT ARE END PIPELINE CHARACTERISTICS FOR CURRENT INSTRUCTION */ for (i = 0; i < NUM_STAGES; ++i) this_inst_end_cycles[i] = inst_end_cycles[inf_inst_ptr -> inst_type][size][i][(int) tcase]; #ifdef __SIB_DEBUG__ /* Maybe check for "sbrs", etc. must be done here ??? */ /* if( sib_verbose ) { printc( " --- \n" ) ; printc( "Time_Path::inside FOR loop for instructions in basic block. 2 \n" ) ; printc( "BEFORE \n" ) ; printc( "Timing for instruction : %s\n", inf_inst_ptr->inst ) ; printc( "path_cycles = %d \n", path_cycles ) ; printc( "this_inst_end_cycles[IF] = %d \t this_inst_End_cycles[EX} = %d \n", this_inst_end_cycles[IF], this_inst_end_cycles[EX] ) ; printc( " --- \n" ) ; } */ #endif // __SIB_DEBUG__ /* Added by Sibin M on April 20, 2004. * Check to see if the current instructions number of cycles is variable. */ // check_update_variable_cycles( inf_inst_ptr->inst_type, this_inst_end_cycles, iterations_handled, max_iterations ) ; /* End of additions by Sibin M - April 20, 2004. */ #ifdef __SIB_DEBUG__ /* Maybe check for "sbrs", etc. must be done here ??? */ /* if( sib_verbose ) { printc( " --- \n" ) ; printc( "Time_Path::inside FOR loop for instructions in basic block. 2 \n" ) ; printc( "AFTER \n" ) ; printc( "Timing for instruction : %s\n", inf_inst_ptr->inst ) ; printc( "this_inst_end_cycles[IF] = %d \t this_inst_End_cycles[EX} = %d \n", this_inst_end_cycles[IF], this_inst_end_cycles[EX] ) ; printc( " --- \n" ) ; } */ #endif // __SIB_DEBUG__ for (i = 0; i < NUM_STAGES; ++i) { begin_use[num_path_inst][i] = 0; end_use[num_path_inst][i] = 0; } /* 13. look at the DC miss penalty (11/7) this makes the stages before the CA further back in time if this is a load or store */ data_access(inf_inst_ptr,this_inst_end_cycles,this_inst_beg_cycles, data_status,&dcache_misses, history +num_path_inst, prev_MEM_Miss,DC_MISS_LATENCY); /* 14. when we are wc naive, there is no pipeline overlap possible and all instructions are misses (6/16) */ if (naive_too) { path_ptr->wc_pc_nai_time += this_inst_end_cycles[IF] + delay_list[0]; path_ptr->wc_pipe_nai_time += this_inst_end_cycles[IF]; path_ptr->bc_pipe_nai_time += 1; if (verbose) { printc ("path's wc pipe nai time = %d\n", path_ptr->wc_pipe_nai_time); printc ("path's bc pipe nai time = %d\n", path_ptr->bc_pipe_nai_time); } } /* 15. for a fp load, we have to change course to finish in FWB instead of WB CA-times for loads and stores are not always the same. */ /*BOOKMARK- the FWB stage does not exit...anymore*/ /* 16. Establish the beginning cycles for this inst for step 10 */ /*CALCULATE THE BEGINING AND END SHAPE FOR CURRENT INSTRUCTIONS */ this_inst_beg_cycles[IF] = 1; for (prev = 0, stage = 1; stage < NUM_STAGES; ++stage) { if (this_inst_end_cycles[stage] == 0) { this_inst_beg_cycles[stage] = 0; continue; } this_inst_beg_cycles[stage] = this_inst_end_cycles[IF] - this_inst_end_cycles[prev] + 2; prev = stage; } /* 17. now find out about this instruction's destination register: initialize values of dest_ready and dest_reg for this instruction */ /*BOOKMARK- for now the instrucitons can produce dests in MEM and EX */ /*FINDING OUT IN WHICH STAGE THE OUTPUT OF INSTRUCTION IS AVAILABLE. NOTE THAT LOADS FINISH IN MEM, OTHERS IN EX, ONLY STORES AND BRANCHES DON'T PRODUCE RESULTS THAT CAN BE PUT IN REGISTERS. THIS IS NOT A BIG CODING PROBLEM SINCE ONLY LOADS CAN CAUSE DATA HAZRDS (RAW), REST ARE STRUCTURAL HAZARDS*/ dest_ready[num_path_inst] = dest_ready_stage(inf_inst_ptr, inst_data_type[num_path_inst]); /*BOOKMARK- POTENTIAL change that name of dest reg may be different ALL DEST REGS ARE IN op3*/ strcpy (dest_reg[num_path_inst], inf_inst_ptr -> op3.reg_list); /* 18. Find out about the src reg(s) too. (4/30) The src operand is needed at the END of which stage? ... */ /*BOOKMARK adding the concept of src3 reg for stores */ /*GET READY TO HANDLE DATA HAZARDS, THIS IS ARTIFACT OF THE TA AS IT WAS FOR THE SPARC ARCH.. NOW THAT WE USE PISA.. IT IS KINDA USELESS FOR EVERRYTHING OTHER THAN MEM OPERATIONS*/ set_src_needed(stage_src1_needed,stage_src2_needed,stage_src3_needed, num_path_inst, inf_inst_ptr); /* * * * * * * if this is the 1st inst in path * * * * * * */ /* 19. just initializing our path numbers (then go to step 28) */ /*ALL THE STUFF DONE SO FAR WAS MAINLY FOR IF STAGE AND TAKING CARE OF THE DATA HAZARDS. THIS IS COMMON FOR ALL INSTRUCTIONS. WE NOW DO ANALYSIS SEPARATELY FOR THE FIRST INSTR IN THE PATH AND THE OTHERS */ if (num_path_inst == 0) { /*MARKED - MORE WRAP AROUND STUFF NOT ANALYZED */ #ifdef TRASH if (wrap_around) { path_ptr->first_word_num = inf_inst_ptr->prog_offset % words_per_line; /* Identify those blocks in the loop this path is contained in. We know path_ptr->header_block. Go to inf->block->next... to find the header block of this loop and find its ->pred, ->pred->next... until you find a pred not in the loop. That will give you only the block #. Go back to inf->block->next... to find the instruct_list field. Look at its instructions starting from the end. Find the one that brings in the cache line that this path begins with. Fortunately, there is a prog_offset field, as in inf->block->next->next->instruct_list->prog_offset But next we need to know that pred inst categorization. Look at node_ptr->parent->instruct_list->cat_list or go to parent's parent if you need to find right inst #. 2/10/96 */ if (verbose){ printc ("Header block for loop is %d\n", path_ptr->header_block); printc ("Here are the blk #'s for this loop\n"); } /* * We are looking for an instruction in preheader block. */ pred_inst_cat = '?'; /* * Find out which loop we're in. */ loop_ele_ptr = Find_Loop (inf, path_ptr->header_block); /* * Print the block #'s for the blocks associated with this loop. */ for (blk_list_ptr = loop_ele_ptr->block_list; blk_list_ptr; blk_list_ptr = blk_list_ptr->next) { if (verbose) printc ("%d\n", blk_list_ptr->block_number); } /* * Look up the header block number in the inf data structure. */ blk_ele_ptr = Locate_Inf_Block(inf, path_ptr->header_block); if (verbose) printc ("according to inf, header block # is %d\n", blk_ele_ptr->block_number); /* * Examine the list of predecessor blocks, and find out which ones * are preheaders. We call Is_In_Block_List, which returns true * if the "pred" block is really contained in the loop. If it * returns false, that pred block is in fact a preheader. */ num_preheader_blks = 0; for (blk_list_ptr = blk_ele_ptr->pred; blk_list_ptr; blk_list_ptr = blk_list_ptr->next) { if (! Is_In_Block_List (blk_list_ptr->block_number, loop_ele_ptr->block_list)) { /* * Set pred_blk_num equal to the block number of this actual * predecessor block. Get out of the loop if we've already * seen a pred block. We only want to handle a situation where * there is a unique preheader. */ pred_blk_num = blk_list_ptr->block_number; ++num_preheader_blks; if (num_preheader_blks > 1) { if (verbose) printc ("HEY - there's more than 1 pred block. Stop now.\n"); break; } if (verbose) printc ("Pred block %d is not in loop\n", blk_list_ptr->block_number); /* * Look up this block number in the inf's list of blocks. * blk_ele_ptr will point to the predecessor block. */ blk_ele_ptr = Locate_Inf_Block(inf, pred_blk_num); /* * Now we want to look at the instructions in this preheader * block. Find out which ones are in the same program line as * the 1st instruction in the current path. */ for (instruct_ptr = blk_ele_ptr->instruct_list; instruct_ptr; instruct_ptr = instruct_ptr->next) { if (instruct_ptr->prog_offset / words_per_line == inf_inst_ptr->prog_offset / words_per_line) { /* * At this point we have identified an instruction in the * predecessor block as being in the same program line as * the 1st instruction in this path. instruct_ptr points * to it, and we will call its inst number pred_inst_num. */ pred_inst_num = instruct_ptr->number; if (verbose) printc ("%d (%d) from pred blk is in same prog line\n", instruct_ptr->number, instruct_ptr->prog_offset); /* * I don't know which node contains the pred instruction we * just identified, so look at the current node and all its * ancestors. */ for (tnode_ptr = node_ptr; tnode_ptr; tnode_ptr = tnode_ptr->parent) { found = 0; /* * For a particular node, see whether the pred inst is * in that node. */ for (inst_cat_ptr = tnode_ptr->instruct_list; inst_cat_ptr; inst_cat_ptr = inst_cat_ptr->next) { if (inst_cat_ptr->inst_number == pred_inst_num) { /* * We have located this instruction. No need to look * at any more ancestor nodes: that's why we set found * to 1. Now we find out whether this pred inst is * categorized (in wc) as a miss. */ found = 1; if (verbose) printc ("wc cat is %c\n", inst_cat_ptr->cat_list->worst_case); if (inst_cat_ptr->cat_list->worst_case == 'm' || inst_cat_ptr->cat_list->worst_case == 'i') { /* * Find which inst # this is, counting from the END. * Also, get the list of instructions beginning with * that miss inst, until the end of preheader blk. */ pred_inst_cat = inst_cat_ptr->cat_list->worst_case; backward_num = count_from_end (instruct_ptr); inst_word_list = get_inst_word_list (instruct_ptr); miss_word =instruct_ptr->prog_offset % words_per_line; curr_word =inf_inst_ptr->prog_offset % words_per_line; } break; } if (found) break; } } } } } } /* * At this point the pred miss inst may or may not be known * If it's not known, be pessimistic and assign a max_jumping_delay. * If it is known, we can calculate the delay now. 2/13/96 * * backward_num can range in value from 1 to words_per_line - 1. */ if (verbose){ printc ("** I found %d preheader block(s)\n", num_preheader_blks); if (num_preheader_blks > 0) { printc ("** pred miss inst categorized as %c\n", pred_inst_cat); printc ("** Counting from end of preheader, it's # %d\n", backward_num); printc ("** pred miss word %d ; path begins on word %d\n", miss_word, curr_word); print_array (inst_word_list,0,backward_num, "** preheader words"); } } if (num_preheader_blks == 1 && pred_inst_cat == 'm' && iterations_handled == 0) { /* * Determine the order of fill of this program line, which depends * on which word the miss word is. It's the first one loaded. */ order_of_fill = find_order_of_fill (miss_word); if (verbose) print_array(order_of_fill, 0, words_per_line, "** order of fill"); /* * delay_list[order_of_fill[curr_word]] represents when the curr * inst is loaded into cache. * delay_list[order_of_fill[inst_word_list[backward_num - 1]]] * represents when the inst imm preceding the curr inst is loaded * into cache. The backward_num-1 is used as the index into the * inst word list because backward_num tells how many instructions * there are from the miss inst to the last preheader inst * inclusive, starting with 1. Array indices must begin at 0. * * We find the difference between these 2 delay list values. If * they are just 1 apart, this means there is no delay, so that's * why we subtract 1 from the difference to get the delay. * Note that we are looking at the curr inst and the last inst in * the preheader, which happens to be the one imm before the * curr inst. */ diff = delay_list[order_of_fill[curr_word]] - 1 - delay_list[order_of_fill[inst_word_list[backward_num - 1]]]; poss_update (&diff, 0, "delay from pred miss can't be negative"); if (verbose) printc ("** delay from pred miss = %d\n", diff); path_ptr->first_line_delay += diff; already_calc_first_line_delay = YES; /* * We also need to assign values to the array avail time, so that * we can detect a delay due to a preheader occuring after the * 1st inst in this path. 2/15/96 */ for (i = curr_word + 1; i < words_per_line; ++i) { avail_time[i] = delay_list[order_of_fill[i]] - delay_list[order_of_fill[curr_word]] + 1; if (verbose) printc ("avail_time[%d] = %d\n", i, avail_time[i]); } } /* let's address situation like in triple where first inst in loop 1 delays 1st inst in loop 2 because they are both i's 3/6/96 */ else if (num_preheader_blks == 1 && pred_inst_cat == 'i' && stats_inst_ptr->cat_list->worst_case == 'i' && iterations_handled == 0) { if (verbose) printc ("WOW I'm an i and the pred is an i too!\n"); order_of_fill = find_order_of_fill (miss_word); print_array(order_of_fill, 0, words_per_line, "** order of fill"); diff = delay_list[order_of_fill[curr_word]] - 1 - delay_list[order_of_fill[inst_word_list[backward_num - 1]]]; poss_update (&diff, 0, "delay from pred miss can't be negative"); if (verbose) printc ("** delay from pred miss = %d\n", diff); i2i_delay = diff; /* what do I do with this number */ already_calc_first_line_delay = NO; } else { already_calc_first_line_delay = NO; } }/*IF (WRAP_AROUND) */ #endif /*SO FAR DOING ANALYSIS FOR THE FIRST INSTR IN THE PATH */ /*CALCULATING THE end_use. THIS IS THE IMPORTANT ARRAY */ for (i = 0; i < NUM_STAGES; ++i) { if (this_inst_end_cycles[i] > path_cycles) path_cycles = this_inst_end_cycles[i]; if (this_inst_end_cycles[i] != 0) end_use[num_path_inst][i] = path_cycles - this_inst_end_cycles[i] + 1; } begin_use[num_path_inst][IF] = 1; // print_array (end_use, 0, NUM_STAGES, "WOT"); /*MUST ADD THE CACHE MISS LATENCY IF MISS... PARAMTERIZATION CODE ADDED */ if ( (assess_penalty && overlap) || (assess_penalty && ! overlap && tcase == BEST) ) { if (tcase == WORST && cache_only) { path_ptr->wc_pipe_nai_time += delay_list[0]; if (verbose) printc ("path's wc pipe nai time = %d\n", path_ptr->wc_pipe_nai_time); } else if (tcase == BEST && cache_only) { path_ptr->bc_pipe_nai_time += delay_list[0]; if (verbose) printc ("path's bc pipe nai time = %d\n", path_ptr->bc_pipe_nai_time); } /*REMEMBER, THIS IS ICACHE MISS */ if (PARAMETERIZE) { path_cycles += 0;//IC_MISS_LATENCY; } else path_cycles += delay_list[0]; icache_misses ++; history[num_path_inst].icache_miss = true; for (stage = 0; stage < NUM_STAGES; stage++) { if (this_inst_end_cycles[stage] == 0) continue; if (PARAMETERIZE) { end_use[num_path_inst][stage] += 0;//IC_MISS_LATENCY; } else end_use[num_path_inst][stage] += delay_list[0]; if (stage > IF) { if (PARAMETERIZE) { begin_use[num_path_inst][stage] += 0;//IC_MISS_LATENCY; } else begin_use[num_path_inst][stage] += delay_list[0]; } } } /* i2i delay was calculated shortly above 3/6/96 */ /*MARKED - MORE WRAP AROUND CACHE STUFF */ #ifdef TRASH if (wrap_around && i2i_delay > 0) { for (stage = 0; stage < NUM_STAGES; ++stage) { if (this_inst_end_cycles[stage] == 0) continue; end_use[num_path_inst][stage] += i2i_delay; if (stage > IF) begin_use[num_path_inst][stage] += i2i_delay; } } #endif if (assess_penalty && ! overlap && tcase == WORST) { if (verbose) printc ("adding delay_list[0] (%d) to non-overlap\n", delay_list[0]); if (PARAMETERIZE) extra_nonoverlap += 1; else extra_nonoverlap += delay_list[0]; } /*MARKED - MORE WRAP AROUND CACHE STUFF */ #ifdef TRASH if (wrap_around) { /* If 1st inst is miss, load values of beg_avail_time for path * 2/1/96 */ if (assess_penalty) for (i = 0; i < words_per_line; ++i) { path_ptr->beg_avail_time[i] = avail_time[i]; } /* Store # of inst that stores the path 2/1/96 */ path_ptr->first_prog_offset = inf_inst_ptr->prog_offset % words_per_line; path_ptr->beg_word_num[0] = inf_inst_ptr->prog_offset % words_per_line; /* Since this is the 1st inst in path, we know what the first program line to be referenced is. 1/31/96 */ path_ptr->beg_cache_line = inf_inst_ptr->prog_offset / words_per_line; /* if 1st inst is a hit, be pessimistic and assume outer is bringing in a cache line we're waiting for 1/30/96 whether that cache line contains this inst or not. We assume we are jumping to a hit even though we may be in same cache line as were are coming from, which would have lead to a 3 instead of 4 cycle delay, but in here we can't tell 1/31/96. */ if (! assess_penalty && already_calc_first_line_delay == NO) { if (verbose) printc ("delay from being hit in 1st line = %d\n", max_jumping_delay); path_ptr->first_line_delay += max_jumping_delay; } /* if 1st inst is a miss, we could still be additionally delayed if the outer environment is loading a diff cache line when we enter 1/31/96 It is the outer's responsibility to detect how much of this pessimism to take away. Note we don't assess this delay for the 1st inst in entir prog. */ if (assess_penalty && ! (strcmp (node_ptr->name, "_main") == 0 && inf_inst_ptr->number == 0) ) { if (verbose) printc ("delay from being miss in 1st line = %d\n", max_jumping_delay); path_ptr->first_line_delay += max_jumping_delay; } } /* end of "if (wrap_around)" */ #endif /* also find out this instruction's source regs (5/11) */ /*NOW SETTING UP VARIABLES TO FIND IF THERE IS ANY DATA HAZARD */ if (*(inf_inst_ptr->op1.reg_list)) { src_needed[reg2int(inf_inst_ptr->op1.reg_list)] = end_use[num_path_inst][stage_src1_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op1.reg_list)] = next_stage (stage_src1_needed[num_path_inst], this_inst_end_cycles); /* if this is the first time in the path that we need this register, store this info with the path */ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] = src_needed[reg2int(inf_inst_ptr->op1.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op1.reg_list)] = src_stage [reg2int(inf_inst_ptr->op1.reg_list)]; } if (verbose){ printc ("\nMy src1 register is %s or %d\n", inf_inst_ptr->op1.reg_list, reg2int(inf_inst_ptr->op1.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op1.reg_list, src_needed[reg2int(inf_inst_ptr->op1.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op1.reg_list)])); } } if (*(inf_inst_ptr->op2.reg_list) && stage_src2_needed[num_path_inst] != NO_STAGE) { src_needed[reg2int(inf_inst_ptr->op2.reg_list)] = end_use[num_path_inst][stage_src2_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op2.reg_list)] = next_stage (stage_src2_needed[num_path_inst], this_inst_end_cycles); /* again, if this is the first time the src reg needed in the path*/ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] = src_needed[reg2int(inf_inst_ptr->op2.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op2.reg_list)] = src_stage [reg2int(inf_inst_ptr->op2.reg_list)]; } if (verbose) { printc ("\nMy src2 register is %s or %d\n", inf_inst_ptr->op2.reg_list, reg2int(inf_inst_ptr->op2.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op2.reg_list, src_needed[reg2int(inf_inst_ptr->op2.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op2.reg_list)])); } } /*BOOKMARK- add source reg finding nfo for op3 */ /*FIXIT- check implementation*/ if (*(inf_inst_ptr->op3.reg_list) && stage_src3_needed[num_path_inst] != NO_STAGE) { src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = end_use[num_path_inst][stage_src3_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op3.reg_list)] = next_stage (stage_src3_needed[num_path_inst], this_inst_end_cycles); /* again, if this is the first time the src reg needed in the path*/ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = src_needed[reg2int(inf_inst_ptr->op3.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op3.reg_list)] = src_stage [reg2int(inf_inst_ptr->op3.reg_list)]; } if (verbose){ printc ("\nMy src3 register is %s or %d\n", inf_inst_ptr->op3.reg_list, reg2int(inf_inst_ptr->op3.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op3.reg_list, src_needed[reg2int(inf_inst_ptr->op3.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op3.reg_list)])); } } prev_Mispredicted_branch = check_branch_misprediction(inf_inst_ptr, inst_num_head); } /* * * * * * * for instructions after the 1st one * * * * * * */ /* 20. Handle data hazards that occur as we resume with outer loop (2/27) */ else {/* instrs after first */ /* As long as we are still in the first program line in this path, * store the inst number with the path so that outer level will * be able to detect delays associated with inst after the first. * Currently, in this path, we give the max delay to 1st inst, but * outer level will need to know the inst numbers to know how much * delay actually exists when the child (this path) is finishing * a program line that began in outer level. 2/4/96 */ /*NOW WE HANDLE ALL THE INSTR IF IT IS NOT THE FIRST IN THE PATH */ /*MARKED - MORE WRAP AROUND STUFF */ #ifdef TRASH if (wrap_around && path_ptr->beg_cache_line == cache_line && num_path_inst < words_per_line) path_ptr->beg_word_num[num_path_inst] = inf_inst_ptr->prog_offset % words_per_line; #endif /* * Checking to see if... this is an exit path, this is * the second to last instruction, it's an annulled branch * instruction, and the 1st inst in path isn't fall thru */ if (inst_after_loop) {/* inst after inner loop or called function */ word_num = inf_inst_ptr->number % words_per_line; if (verbose) printc ("\n````````````````````````````````````````\n"); /* adjust inner_release_time according to path_cycles??? update the path's release_time array with values from the inner loop/function (5/22) */ /*IF THIS IS INSTRUCTION AFTER A LOOP ... WE USE THE INNER RELEASE TIME FOR ALL THE REGISTERS TO CHECK RELEASE TIME OF CURRENT PATH */ for (i = 0; i < NUMREGS; ++i) if (inner_release_time[i] != NEVER) inner_release_time[i] = path_cycles - inner_release_time[i]+1; for (i = 0; i < NUMREGS; ++i) { poss_update (&release_time[i], inner_release_time[i], ""); } if (verbose) { printc ("Here is the register situation as we resume the path\n"); print_reg_info (src_needed, src_stage, inner_release_time); printc ("list of completed inner loop's union end info\n"); for (stage = 0; stage < NUM_STAGES; stage++) { for (p = inner_end_head[stage]; p; p = p -> next) if (p->occupant) printc ("stage %d end-cycle %d occupant %d\n", stage, p -> cycles, p -> occupant -> number); } } /* ============================================================== */ /* slide the stages just as we did going into loop (started 6/25) */ /*IF REQUIRED MAKE CAHNGES TO THE PATH CYCLES */ old_path_cycles = path_cycles - path_end_cycles[IF] + 1; for (stage = 0; stage < NUM_STAGES; ++stage) poss_update (&old_path_cycles, path_cycles - path_end_cycles[stage] + 1, ""); if (verbose) { printc ("After the child, here is the end_use of the stages:\n"); for (stage = 0; stage < NUM_STAGES; ++stage) printc ("%3d ", (path_end_cycles[stage] != 0) ? (path_cycles - path_end_cycles[stage] + 1) : 0); printc ("\n"); } /* earliest start is 1 more than the finish time of each stage */ for (stage = 0; stage < NUM_STAGES; ++stage) if (path_end_cycles[stage] == 0) earliest_start[stage] = 0; else earliest_start[stage] = path_cycles - path_end_cycles[stage] +2; /*BOOKMARK- in anticipation of the overlapping icache and dcache miss overlaps, the earliest start will be set to the end of hte mem stage. this also takes care of the branch misprediction commented above*/ branch_penalty_after_loop(earliest_start); /* make adjustment based on this inst's own pipeline shape */ /*NOW CHANGING THE INSTRUCTIONS PIPELINE INFO IF REQD */ for (prev = 0, stage = 1; stage < NUM_STAGES; ++stage) { if (this_inst_beg_cycles[stage] == 0) { earliest_start[stage] = 0; continue; } poss_update (&earliest_start[stage], earliest_start[prev] + this_inst_beg_cycles[stage] - this_inst_beg_cycles[prev], "inst own beg shape"); prev = stage; } if (verbose){ printc ("before looking at data hazards, here's earliest start:\n"); print_array (earliest_start, 0, NUM_STAGES, ""); } /* figure when the instruction finishes (6/26) */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (this_inst_end_cycles[stage] == 0) { inst_done[stage] = 0; continue; } inst_done[stage] = earliest_start[stage] + this_inst_end_cycles[IF] - this_inst_end_cycles[stage] + 1 - this_inst_beg_cycles[stage]; } /* inst should finish immediately before going on to next stage */ for (prev = 0, stage = 1; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; poss_update (&inst_done[prev], earliest_start[stage] - 1, ""); prev = stage; } /* if inst in same cache line as child was loading into cache, * inst after must be delayed 1/29/96 */ word_num = inf_inst_ptr->prog_offset % words_per_line; if (verbose){ printc ("current cache line = %d\n", cache_line); printc ("current word num = %d\n", word_num); printc ("inner end cache line = %d\n", inner_end_cache_line); print_array (inner_end_avail_time, 0, words_per_line, "inner end avail time"); print_array (inner_fm_end_avail_time, 0, words_per_line, "inner fm end avail time"); } /*MARKED - MORE WRAP AROUND STUFF */ #ifdef TRASH if (wrap_around) { max_inner_end_avail_time = largest_entry (inner_end_avail_time, 0, words_per_line); max_inner_fm_end_avail_time = largest_entry (inner_fm_end_avail_time, 0, words_per_line); } if (wrap_around && cache_line == inner_end_cache_line && inner_end_avail_time[word_num] > earliest_start[IF]) { for (i = 0; i < NUM_STAGES; ++i) { if (this_inst_end_cycles[i] != 0) { inst_done[i] += inner_end_avail_time[word_num] - earliest_start[IF]; } } if (verbose) printc ("returning from child, SAME LINE, delay = %d\n", inner_end_avail_time[word_num] - earliest_start[IF]); } /* if we are jumping to a diff cache line after coming out of child, delay based on when last word is written */ else if (wrap_around && cache_line != inner_end_cache_line){ if (max_inner_end_avail_time + 1 > earliest_start[IF]) { if (verbose){ print_array (earliest_start, 0, NUM_STAGES, "earliest_start"); print_array (inst_done, 0, NUM_STAGES, "inst done before adj"); } /* new method 3/20/96 */ inst_done[IF] += max_inner_end_avail_time - earliest_start[IF] + 1; for (prev = 0, stage = 1; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; earliest_start[stage] = inst_done[prev] + 1; poss_update (&inst_done[stage], earliest_start[stage], ""); prev = stage; } if (verbose) { print_array (earliest_start, 0, NUM_STAGES, "earliest_start"); print_array (inst_done, 0, NUM_STAGES, "inst done after adj"); printc ("returning from child, DIFF LINE, delay = %d\n", max_inner_end_avail_time - earliest_start[IF] + 1); } } if (max_inner_fm_end_avail_time > max_inner_end_avail_time) { diff = max_inner_fm_end_avail_time - earliest_start[0] + 1; poss_update (&diff, 0, "diff bet max inner fm end avail time & e_s[IF]"); path_ptr->delay_after_fm += diff; /* make a node in linked list for this fm that caused a delay here 3/18/96 */ memory_count += 12; delay_after_fm_temp = (struct delay_after_fm_node *) malloc (sizeof (struct delay_after_fm_node)); delay_after_fm_temp->inst = inner_last_fm; delay_after_fm_temp->cycles = diff; delay_after_fm_temp->next = delay_after_fm_head; delay_after_fm_head = delay_after_fm_temp; if (verbose){ printc ("Adding %d to delay after fm, ret from child\n", diff); printc ("caused by fm at instruction %d\n", inner_last_fm); } } } #endif /* if we brought in a new cache line towards end of child, let this path keep track of this most recently loaded line 1/31/96 */ /* Upon later reflection, I think it's only a good idea if inner end cache line == this cache line 2/5/96 */ /* Also include inner first line delay when updating end avail time 3/15/96 */ #ifdef TRASH if (wrap_around) { if (inner_end_cache_line == cache_line && inner_end_avail_time[0] > avail_time[0]) { for (i = 0; i < words_per_line; ++i) { avail_time[i] = inner_end_avail_time[i]; avail_time[i] += inner_first_line_delay; } } /* Also include inner first line delay when updating last word loaded 3/15/96 */ poss_update (&last_word_loaded, max_inner_end_avail_time + inner_first_line_delay, "child has greater max avail time"); } #endif /*MARKED - WRAP AROUND CACHE STUFF ENDS HERE....*/ /* the miss penalty, if applicable */ if (verbose) printc ("assess_penalty = %d\n", assess_penalty); if ( (assess_penalty && overlap) || (assess_penalty && ! overlap && tcase == BEST) ) { if (PARAMETERIZE) { inst_done[IF] += 0;// IC_MISS_LATENCY; } else inst_done[IF] += delay_list[0]; icache_misses ++; history[num_path_inst].icache_miss = true; for (stage = 1; stage < NUM_STAGES; ++stage) { if (this_inst_end_cycles[stage] == 0) continue; poss_update (&inst_done[stage], inst_done[IF] + this_inst_end_cycles[IF] - this_inst_end_cycles[stage], ""); } } if (assess_penalty && ! overlap && tcase == WORST) { if (verbose) printc ("adding delay_list[0] (%d) to non-overlap\n", delay_list[0]); if (PARAMETERIZE) extra_nonoverlap += 1; else extra_nonoverlap += delay_list[0]; } /* the stage start times should conform to inst's shape not be the earliest possible times for each stage */ for (stage = 1; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; prev = prev_stage (stage, this_inst_end_cycles); poss_update (&earliest_start[stage], inst_done[prev] + 1, "inst shape"); } /* find src_needed, update path ptr's src needed if necessary, so that we can find data hazard coming out of loop (7/6) */ /*NOW CHECKING FOR DATA HAZARDS FOR INSTR OTHER THAN FIRST *//*iamsam*/ if (*(inf_inst_ptr->op1.reg_list)) { src_needed[reg2int(inf_inst_ptr->op1.reg_list)] = inst_done[stage_src1_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op1.reg_list)] = next_stage (stage_src1_needed[num_path_inst], this_inst_end_cycles); /* if this is the 1st time in the path that we need this register, store this info with the path */ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] = src_needed[reg2int(inf_inst_ptr->op1.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op1.reg_list)] = src_stage [reg2int(inf_inst_ptr->op1.reg_list)]; } if (verbose){ printc ("My src1 register is %s or %d\n", inf_inst_ptr->op1.reg_list, reg2int(inf_inst_ptr->op1.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op1.reg_list, src_needed[reg2int(inf_inst_ptr->op1.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op1.reg_list)])); } diff = release_time[reg2int(inf_inst_ptr->op1.reg_list)] - src_needed[reg2int(inf_inst_ptr->op1.reg_list)]; if (diff > 0) { poss_update (&delay, diff, ""); if (verbose) printc ("DATA HAZARD: delay = %d cycles\n", diff); inst_done[stage_src1_needed[num_path_inst]] += diff; for (stage = src_stage[reg2int(inf_inst_ptr->op1.reg_list)]; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; earliest_start[stage] += diff; inst_done[stage] += diff; } } } if (*(inf_inst_ptr->op2.reg_list) && stage_src2_needed[num_path_inst] != NO_STAGE) {/**/ src_needed[reg2int(inf_inst_ptr->op2.reg_list)] = inst_done[stage_src2_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op2.reg_list)] = next_stage (stage_src2_needed[num_path_inst], this_inst_end_cycles); /* again, if this is the 1st time the src reg needed in the path*/ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] = src_needed[reg2int(inf_inst_ptr->op2.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op2.reg_list)] = src_stage [reg2int(inf_inst_ptr->op2.reg_list)]; } if (verbose) { printc ("My src2 register is %s or %d\n", inf_inst_ptr->op2.reg_list, reg2int(inf_inst_ptr->op2.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op2.reg_list, src_needed[reg2int(inf_inst_ptr->op2.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op2.reg_list)])); } diff = release_time[reg2int(inf_inst_ptr->op2.reg_list)] - src_needed[reg2int(inf_inst_ptr->op2.reg_list)]; if (diff > 0) { poss_update (&delay, diff, ""); if (verbose) printc ("DATA HAZARD: delay = %d cycles\n", diff); inst_done[stage_src2_needed[num_path_inst]] += diff; for (stage = src_stage[reg2int(inf_inst_ptr->op2.reg_list)]; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; earliest_start[stage] += diff; inst_done[stage] += diff; } } } /*BOOKMARK- add op3 info..*/ /*FIXIT- check implementation*/ if (*(inf_inst_ptr->op3.reg_list) && stage_src3_needed[num_path_inst] != NO_STAGE) { src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = inst_done[stage_src3_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op3.reg_list)] = next_stage (stage_src3_needed[num_path_inst], this_inst_end_cycles); /* again, if this is the 1st time the src reg needed in the path*/ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = src_needed[reg2int(inf_inst_ptr->op3.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op3.reg_list)] = src_stage [reg2int(inf_inst_ptr->op3.reg_list)]; } if (verbose) { printc ("My src3 register is %s or %d\n", inf_inst_ptr->op3.reg_list, reg2int(inf_inst_ptr->op3.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op3.reg_list, src_needed[reg2int(inf_inst_ptr->op3.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op3.reg_list)])); } diff = release_time[reg2int(inf_inst_ptr->op3.reg_list)] - src_needed[reg2int(inf_inst_ptr->op3.reg_list)]; if (diff > 0) { poss_update (&delay, diff, ""); if (verbose) printc ("DATA HAZARD: delay = %d cycles\n", diff); inst_done[stage_src3_needed[num_path_inst]] += diff; for (stage = src_stage[reg2int(inf_inst_ptr->op3.reg_list)]; stage < NUM_STAGES; ++stage) { if (earliest_start[stage] == 0) continue; earliest_start[stage] += diff; inst_done[stage] += diff; } } } if (delay >0) history[num_path_inst].is_stalled = true; /*THIS IS WHERE I CAN ADD THE CODE TO COUNT THE NUMBER OF OVERLAPS IN THE PATH WHEN A FLOP FOLLOWS A MEM MISS */ /*THIS PLACE CAN ALSO BE USED TO CHECK OUT IF THIS IS A BRANCH INSTR AND ADDING THE BRANCH MISPREDICTION */ prev_Mispredicted_branch = check_branch_misprediction(inf_inst_ptr, inst_num_head); /*NEGATE THE MISPREDICTION IF THE PREVIOUS INSTR WAS D$ MISS */ /*WHY SHOULD WE NEGATE BRANCH MISPREDICTION WHICH OVERLAPS DACACHE MISS*/ //#if 1 if (history[num_path_inst-1].is_mem_op && history[num_path_inst-1].dcache_miss && !history[num_path_inst-1].negate) { prev_Mispredicted_branch = false; printf("NEGATION\n"); } //#endif /* path_cycles should be the latest finish time of this inst */ new_path_cycles = inst_done[IF]; for (stage = 1; stage < NUM_STAGES; ++stage) poss_update (&new_path_cycles, inst_done[stage], ""); /* If inst we are returning to is a miss, then we have to update * the avail times of all words in cache line to take into * account time of child (advance their times) 2/5/96 */ /*MARKED - WRAP AROUND STUFF */ #ifdef TRASH if (wrap_around && assess_penalty) { /* I think it's better to have temp store the avail time of * the current word in the program line, not 0. 2/25/96 */ word_num = inf_inst_ptr->prog_offset % words_per_line; temp = avail_time[word_num]; /* place to hold old value */ for (i = 0; i < words_per_line; ++i) avail_time[i] += inst_done[0] - temp; /* something I forgot: 2/26/96 */ poss_update (&last_word_loaded, largest_entry (avail_time, 0, words_per_line), "last word loaded"); if (verbose){ printc ("word num = %d\n", word_num); print_array (avail_time, 0, words_per_line, "updated avail times"); } } #endif if (verbose){ print_array (earliest_start, 0, NUM_STAGES, "inst start"); print_array (inst_done, 0, NUM_STAGES, "inst done "); printc ("Old path cycles = %d\nnew path cycles = %d\n", old_path_cycles, new_path_cycles); } /* mimic what should happen and continue to next inst */ path_cycles = new_path_cycles; if (compute_pd) { path_pipeline[num_path_inst] = inf_inst_ptr -> number; } /* * let's add the child's inner_first_line_delay to the path's * pipeline time, and not accumulate it as more of an inner 1st * line delay. 2/19/96 */ /*MARKED - WRAP AROUND STUFF */ #ifdef TRASH if (wrap_around && inner_first_line_delay > 0 && verbose) printc ("adding %d from inner 1st line delay\n", inner_first_line_delay); /* this loop is to help summatrix. But it inc error for triple3 from 23 to 24, for sum from 16 to 22. 3/11/96 Big programs get it big: for summatrix 57538 to 107540 over...*/ /*MARKED - WRAP AROUND STUFF */ if (wrap_around) for (i = 0; i < words_per_line; ++i) avail_time[i] += inner_first_line_delay; #endif /*WE NOW CALCULATE THE IMPORTANT VARIABLES THAT LET US SEE THE PIPLELINE SHPAE OF THE INSTRUCTIOSN */ for (stage = 0; stage < NUM_STAGES; ++stage) { if (this_inst_end_cycles[stage] != NEVER) { begin_use[num_path_inst][stage] = earliest_start[stage] + inner_first_line_delay; end_use[num_path_inst][stage] = inst_done[stage] + inner_first_line_delay; path_end_cycles[stage] = path_cycles - end_use[num_path_inst][stage] + 1; end_occupant[stage] = inf_inst_ptr; if (path_beg_cycles[stage] == 0) path_beg_cycles[stage] = begin_use[num_path_inst][stage]; } } inst_after_loop = 0; inst_after_func = NO_FUNC; /* Double loads take 2 cycles in CA (5/19) The third test is not to add the second cycle if there was a structural hazard with a flop who had to go to FWB first. The fourth test is to make sure we don't add a cycle if the inst is already in CA long enough. (7/2) */ /*CODE REMOVAL*/ /* * In case this is a load, and also the inst in delay slot * annulled branch that is not taken, it must behave like * regular int instruction. (11/18/96) */ /*PRINTING OUT INFO ABOUT DESTINATION REGISTER */ if (*dest_reg[num_path_inst]) { release_time[reg2int(dest_reg[num_path_inst])] = end_use[num_path_inst][dest_ready[num_path_inst]]; if (verbose){ printc ("\nThis inst dest register is %s or %d\n", dest_reg[num_path_inst], reg2int(dest_reg[num_path_inst])); printc ("Dest register busy until %d\n", release_time[reg2int(dest_reg[num_path_inst])]); } } if (verbose) { global_info (' '); print_path_fields (path_ptr, tcase); } /* At this point we don't need to know about child's ending cache line for the rest of the path 1/30/96 */ inner_end_cache_line = -1; continue; /* this inst is finished */ }/*if (inst_after_loop) */ /* ============================================================== */ /* 21. This inst will finish IF at the same time the previous instruction finished its ID. We also assume that this instruction will finish its ID 1 cycle later, which may soon change. If prev inst only fetched, our IF end time is 1 cycle later than prev's */ /*NOW LOOKING AT INSTRS THAT ARE NORMAL */ else { /* NOT inst after loop or function */ if (done[0] != 0 && verbose) print_array (done, 0, NUM_STAGES, "stages are done"); next = next_stage (IF, this_inst_end_cycles); if (next == NO_STAGE || end_use[num_path_inst - 1][next] == 0) end_use[num_path_inst][IF] = end_use[num_path_inst - 1][IF] + 1; else end_use[num_path_inst][IF] = end_use[num_path_inst - 1][next]; begin_use[num_path_inst][IF] = end_use[num_path_inst - 1][IF] + 1; } /* the miss penalty, if applicable */ #ifdef TRASH if (! wrap_around) extra_delay = 0; #endif if ( (assess_penalty && overlap ) || (assess_penalty && ! overlap && tcase == BEST) ) { /*BOOKMARK- CODE RED - SHOULD I INCREMENT ICACHE MISSES HERE*/ /*RIGHT NOW I GUESS YES */ if (PARAMETERIZE ) { end_use[num_path_inst][IF] = begin_use[num_path_inst][IF] + extra_delay +0;// IC_MISS_LATENCY; } else end_use[num_path_inst][IF] = begin_use[num_path_inst][IF] + delay_list[0] + extra_delay; icache_misses ++; history[num_path_inst].icache_miss = true; /* * In cache only, we also have to add the miss penalty * for instructions after the first too. 5/15/99 */ if (cache_only) { path_ptr->wc_pipe_nai_time += delay_list[0]; path_ptr->bc_pipe_nai_time += delay_list[0]; if (verbose){ printc ("path's wc pipe nai time = %d\n", path_ptr->wc_pipe_nai_time); printc ("path's bc pipe nai time = %d\n", path_ptr->bc_pipe_nai_time); } } } /*BOOKMARK- THIS MIGHT BE A GOOD PLACE TO ADD THE EXTRA DELAY CYCLES CAUSED BY I$ MISS AND D$ MISS OVERLAP */ /*FIXIT- verify and improve code below*/ #if 0 if (!PARAMETERIZE) { k = num_path_inst; for (i=0; i < 4 && k >0;i++,k--) { if (history[k-1].is_stalled) i++; } for (; k < num_path_inst;k++) { if (history[k].is_mem_op && history[k].dcache_miss && history[num_path_inst].icache_miss) { if (begin_use[k][MEM] < end_use[num_path_inst][IF]) { printf("ACTUALLY HAPPENS \n"); l = MEM; for (j=k; j prog_offset % words_per_line; curr_time = (num_path_inst > 0) ? end_use[num_path_inst - 1][IF] + 1: 1; if (verbose) { printc ("curr time = %d\n", curr_time); printc ("this inst available at %d\n", avail_time[word_num]); } if (avail_time[word_num] > curr_time) { if (jumping) { if (verbose) printc ("let's add %d to end use IF\n", i2f_jump_delay); end_use[num_path_inst][IF] = begin_use[num_path_inst][IF] + i2f_jump_delay; } else { end_use[num_path_inst][IF] = begin_use[num_path_inst][IF] + avail_time[word_num] - curr_time; if (verbose) printc ("inst delayed by %d", avail_time[word_num] - curr_time); } } if (fm_avail_time[word_num] > curr_time && iterations_handled == 0) /* new way 2/6/96 */ { if (verbose) { printc ("delay after fm ??? %d\n", fm_avail_time[word_num] - curr_time); printc ("last fm f2f = %d\n", last_fm_f2f); } /* this isn't accurate. we need to look at the difference between adjacent entries in fm avail time, because the curr time is not being incremented. 3/6/96 Note, for example, that in triple's 1st iter of loop 2, this cumulative delays for the fm 16 is 12 cycles, but it really should be 3( 1 for 18, 1 for 20 and 1 for 22) */ diff = fm_avail_time[word_num] - curr_time; poss_update (&diff, 0, "diff cannot be negative"); end_use[num_path_inst][IF] = begin_use[num_path_inst][IF]; /* wrong? 3/3/96*/ if (last_fm_f2f) { if (pending_dead_cycles == 0) path_ptr->fm_dead_cycles += diff; else if (diff > 1) path_ptr->fm_dead_cycles += diff - 1; pending_dead_cycles += diff; if (verbose) { printc ("pending dead cycles = %d\n", pending_dead_cycles); printc ("path's fm dead cycles = %d\n", path_ptr->fm_dead_cycles); } } else { end_use[num_path_inst][IF] += diff; } } } #endif /* on the side, it may be necessary to delay because of a recent first miss in this path. Calculate delay in same way, but look at the fm_avail_time. I'm talking about jumping to another program line in this path. 2/6/96 */ /* inst can't finish IF before the prev inst finishes ID (5/15) */ update_end_use(end_use, num_path_inst,this_inst_end_cycles); /* 22. if I'm a flop, then I leave the ID stage the same time as the previous flop (whenever that was) leaves FPEX, or end IF + 1, WHICHEVER IS LATER. If there was no previous flop, leave ID 1 cycle after leaving IF The same reasoning holds for an integer instruction. */ /* 23. Identify this instruction's source register(s) and find out at which cycle they are needed before continuing into the next stage. (4/30) NOTE: The delay due to a data hazard may coincide with a structural hazard. */ /*NOW WE FIND OUT SOURCE REGISTER INFORMATION... TO CHECK FOR DATA HAZARDS */ /* it may be helpful to know the end-use time of EX for the case of handling a store inst waiting for a value to store (5/13) */ initialize_end_use(end_use, num_path_inst,this_inst_end_cycles); /*this is where the data haz code was removed from */ /*CODE REMOVAL*/ /* 24. end_use ID may be later if we're waiting for a cond_code to be set We look back to the last inst in which the cond code was set, as long as it was the same data size inst, and we finish ID when the compare finishes the EX/FPEX */ /*SO FAR END USE HAS THE IF STAGE AND EX STAGE INFO .. WE HAVE TO DERIVE THE REST */ add_misprediction_penalty(prev_Mispredicted_branch, end_use,num_path_inst); /*BOOKMARK- If instruction is branch check if mispredicted and setup delay for following instrs*/ /*SETTING UPT HE STAGE FOR A BRANCH MISPREDICTION IF THIS INSTR IS A MISPREDICTED BRANCH */ prev_Mispredicted_branch = check_branch_misprediction(inf_inst_ptr, inst_num_head); /*NEGATE THE MISPREDICTION IF THE PREVIOUS INSTR WAS D$ MISS */ //#if 1 if (history[num_path_inst-1].is_mem_op && history[num_path_inst-1].dcache_miss && !history[num_path_inst-1].negate){ prev_Mispredicted_branch = false; } //#endif /*function below has 24,25 ,26 */ calculate_end_use(this_inst_end_cycles, end_use,begin_use, num_path_inst, only_fetch, inst_cond_code, &cc_set, inst_data_type, inf_inst_ptr, size,tcase); /* 27. Now add this delay to the end_use of the latter stages (5/22) */ /*ADDING DATA HAZARD DELAY TO THE END USE */ /*MARKED- CHECK IF THE DATA HAZARD DELAY HAS MEMORY COMPONENT.. SEEMS UNLIKELY*/ /*BOOKMARK- IT MIGHT BE BEST TO MOVE THE DATA HAZARD CHECKING CODE BELOW*/ delay = 0; if (!only_fetch && *(inf_inst_ptr->op1.reg_list)) { src_needed[reg2int(inf_inst_ptr->op1.reg_list)] = end_use[num_path_inst][stage_src1_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op1.reg_list)] = next_stage (stage_src1_needed[num_path_inst], this_inst_end_cycles); /* if this is the first time in the path that we need this register, store this info with the path */ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] = src_needed[reg2int(inf_inst_ptr->op1.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op1.reg_list)] = src_stage [reg2int(inf_inst_ptr->op1.reg_list)]; } if (verbose) { printc ("\nMy src1 register is %s or %d\n", inf_inst_ptr->op1.reg_list, reg2int(inf_inst_ptr->op1.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op1.reg_list, src_needed[reg2int(inf_inst_ptr->op1.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op1.reg_list)])); } diff = release_time[reg2int(inf_inst_ptr->op1.reg_list)] - src_needed[reg2int(inf_inst_ptr->op1.reg_list)]; if (diff > 0 && release_time[reg2int(inf_inst_ptr->op1.reg_list)] != NEVER && src_needed[reg2int(inf_inst_ptr->op1.reg_list)] != NEVER) { poss_update (&delay, diff, ""); if (verbose) printc ("DATA HAZARD: delay = %d cycles\n", diff); } } if (! only_fetch && *(inf_inst_ptr->op2.reg_list) && stage_src2_needed[num_path_inst] != NO_STAGE) { src_needed[reg2int(inf_inst_ptr->op2.reg_list)] = end_use[num_path_inst][stage_src2_needed[num_path_inst]]; src_stage[reg2int(inf_inst_ptr->op2.reg_list)] = next_stage (stage_src2_needed[num_path_inst], this_inst_end_cycles); /* again, if this is the first time the src reg needed in the path*/ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] = src_needed[reg2int(inf_inst_ptr->op2.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op2.reg_list)] = src_stage [reg2int(inf_inst_ptr->op2.reg_list)]; } if (verbose){ printc ("\nMy src2 register is %s or %d\n", inf_inst_ptr->op2.reg_list, reg2int(inf_inst_ptr->op2.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op2.reg_list, src_needed[reg2int(inf_inst_ptr->op2.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op2.reg_list)])); } diff = release_time[reg2int(inf_inst_ptr->op2.reg_list)] - src_needed[reg2int(inf_inst_ptr->op2.reg_list)]; if (diff > 0 && release_time[reg2int(inf_inst_ptr->op2.reg_list)] != NEVER && src_needed[reg2int(inf_inst_ptr->op2.reg_list)] != NEVER) { /* if a store waits on both its operands, it first has to wait in ID and again in EX (7/5) */ /*DON'T HAVE TO ADD THE DIFF TO THE END USE CAUSE ONLY THE LARGER ONE WILL BE REQUIRED TO BE ADDED TO THE IS LATER */ /* if (delay > 0 && diff > 0) { if (verbose) printc ("TWO data hazards on same inst\n"); if (is_store_inst (inf_inst_ptr->inst)) end_use[num_path_inst][ID] += diff; } */ poss_update (&delay, diff, ""); if (verbose) printc ("DATA HAZARD: delay = %d cycles\n", diff); } } /*BOOKMARK- added the op3 stuff */ /*FIXIT- check implementation*/ if (! only_fetch && *(inf_inst_ptr->op3.reg_list) && stage_src3_needed[num_path_inst] != NO_STAGE) { // if (next_stage (stage_src3_needed[num_path_inst], this_inst_end_cycles)==EX) src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = end_use[num_path_inst][stage_src2_needed[num_path_inst]]; /*else src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = end_use[num_path_inst][EX] + (inst_end_cycles[inf_inst_ptr->inst_type][size][IS][(int) tcase] -inst_end_cycles[inf_inst_ptr->inst_type][size][EX][(int)tcase]) ; */ src_stage[reg2int(inf_inst_ptr->op3.reg_list)] = next_stage (stage_src3_needed[num_path_inst], this_inst_end_cycles); /* again, if this is the first time the src reg needed in the path*/ if (path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] == NEVER) { path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] = src_needed[reg2int(inf_inst_ptr->op3.reg_list)]; path_ptr->src_stage [reg2int(inf_inst_ptr->op3.reg_list)] = src_stage [reg2int(inf_inst_ptr->op3.reg_list)]; } if (verbose){ printc ("\nMy src3 register is %s or %d\n", inf_inst_ptr->op3.reg_list, reg2int(inf_inst_ptr->op3.reg_list)); printc ("Value of %s is needed by %d before entering %s\n", inf_inst_ptr->op3.reg_list, src_needed[reg2int(inf_inst_ptr->op3.reg_list)], stage_name(src_stage[reg2int(inf_inst_ptr->op3.reg_list)])); } diff = release_time[reg2int(inf_inst_ptr->op3.reg_list)] - src_needed[reg2int(inf_inst_ptr->op3.reg_list)]; if (diff > 0 && release_time[reg2int(inf_inst_ptr->op3.reg_list)] != NEVER && src_needed[reg2int(inf_inst_ptr->op3.reg_list)] != NEVER) { /* if a store waits on both its operands, it first has to wait in ID and again in EX (7/5) */ /*FOR STORES, IF THE THIRD REG IS NOT AVAILABLE, THE DELAY MUST BE IN THE EX STAGE .DOING HTAT NOW*/ end_use[num_path_inst][stage_src3_needed[num_path_inst]] += diff; if (verbose) printc ("DATA HAZARD: delay = %d cycles\n", diff); } } if (delay >0) history[num_path_inst].is_stalled = true; add_data_hazard_penalty(delay, this_inst_end_cycles, end_use[num_path_inst], inf_inst_ptr); /* in the case of a st victim, we give the full penalty to the EX stage, and the CA stage follows normally (7/25) */ if (this_inst_end_cycles[i] != 0 && delay > 0) { /* we have to be careful only to change the path ptr's src needed if is the same as this instruction's. Otherwise it was needed by an earlier instruction and must not be incremented. (7/5) */ if (*(inf_inst_ptr->op1.reg_list)) { if (src_needed[reg2int(inf_inst_ptr->op1.reg_list)] == path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)]) { path_ptr->src_needed[reg2int(inf_inst_ptr->op1.reg_list)] += delay; } src_needed[reg2int(inf_inst_ptr->op1.reg_list)] += delay; } if (*(inf_inst_ptr->op2.reg_list)) { if (src_needed[reg2int(inf_inst_ptr->op2.reg_list)] == path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)]) { path_ptr->src_needed[reg2int(inf_inst_ptr->op2.reg_list)] += delay; } src_needed[reg2int(inf_inst_ptr->op2.reg_list)] += delay; } /*BOOKMARK- more op3 stuff added */ /*FIXIT- check implementation*/ if (*(inf_inst_ptr->op3.reg_list)) { if (src_needed[reg2int(inf_inst_ptr->op3.reg_list)] == path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)]) { path_ptr->src_needed[reg2int(inf_inst_ptr->op3.reg_list)] += delay; } src_needed[reg2int(inf_inst_ptr->op3.reg_list)] += delay; } } } /* 28. Double loads take 2 cycles in CA (5/19) The third test is not to add the second cycle if there was a structural hazard with a flop who had to go to FWB first */ /*BOOKMARK- double loads take same time...*/ /*ALL MEM OPERATIONS ARE CREATED EQUAL IN PISA */ #if 0 if (!only_fetch && is_load_inst (inf_inst_ptr->inst) && inf_inst_ptr->data_type == 32 && end_use[num_path_inst][CA] == end_use[num_path_inst][EX] + 1) { ++end_use[num_path_inst][CA]; ++begin_use[num_path_inst][FWB]; ++end_use[num_path_inst][FWB]; if (naive_too) { ++path_ptr->wc_pc_nai_time; ++path_ptr->wc_pipe_nai_time; printc ("path's pipe nai time = %d\n", path_ptr->wc_pipe_nai_time); } } /* 29. Double stores take 3 cycles in CA (5/21) */ if (!only_fetch && is_store_inst (inf_inst_ptr->inst) && inf_inst_ptr->data_type == 32) { ++end_use[num_path_inst][CA]; if (naive_too) { ++path_ptr->wc_pc_nai_time; ++path_ptr->wc_pipe_nai_time; printc ("path's pipe nai time = %d\n", path_ptr->wc_pipe_nai_time); } } #endif /* 30. Data hazards can occur when a register is written to by an instruction which is not an end occupant. For this reason, we need to look at the "times" when registers are written to and needed, as we did in the simulator. Now that we know the end_use time for this instruction, we can store this value as the "release time" of the register dest. (4/29) Release time is the time when the register becomes available as a potential source operand. */ if (!only_fetch && *dest_reg[num_path_inst]) { release_time[reg2int(dest_reg[num_path_inst])] = end_use[num_path_inst][dest_ready[num_path_inst]]; if (verbose){ printc ("\nThis inst dest register is %s or %d\n", dest_reg[num_path_inst], reg2int(dest_reg[num_path_inst])); printc ("Dest register busy until %d\n", release_time[reg2int(dest_reg[num_path_inst])]); } } /* 31. after end_use determined, find begin_use... only begin use IF has a different definition depending on whether this is the 1st instruction in the path (set in step 10) */ calculate_begin_use(begin_use[num_path_inst],end_use[num_path_inst], this_inst_end_cycles); /* * make adjustments if done[] is larger than begin_use for any stage * (4/19/96 */ if (verbose) print_array (begin_use[num_path_inst], 0, NUM_STAGES, "begin use"); diff = 0; for (stage = 0; stage < NUM_STAGES; ++stage) { if (begin_use[num_path_inst][stage] != 0 && done[stage] > begin_use[num_path_inst][stage]) poss_update (&diff, done[stage]-begin_use[num_path_inst][stage] + 1, "done diff"); } if (tcase == WORST && diff > 0) { for (stage = 0; stage < NUM_STAGES; ++stage) { if (begin_use[num_path_inst][stage] != 0) begin_use[num_path_inst][stage] += diff; if (end_use[num_path_inst][stage] != 0) end_use[num_path_inst][stage] += diff; } } for (stage = 0; stage < NUM_STAGES; ++stage) done[stage] = 0; /* 32. Keep track of the first time cc needed and last time used (5/16) This info is used when this path is examined at a higher level. */ /*ALSO KEEP TRACK OF CONDITION CODE REGISTERS FOR INT AND FLOPS. THESE ARE USED TO FIND DATA HAZARDS */ calculate_cc_reg(&inst_cond_code[num_path_inst], &inst_data_type[num_path_inst], &path_ptr->src_needed[INT_CC], end_use[num_path_inst], &release_time[INT_CC]); /* 33. now determine the last time someone finishes writing back so we can calculate the path_cycles up to this instruction */ /*BOOKMARK- no more FWB*/ if (compute_pd) path_pipeline[num_path_inst] = inf_inst_ptr -> number; /*MARKED - BELOW I DO NOT UNDERSTAND WHY THE IF ELSE WAS AS BEFORE IN THE OLD WAY, PATH CYCLES WAS NOT UPDATED IF THIS INSTR WAS AFTER A INSERTED FUNC... */ if (inst_after_func == INSERT_FUNC) /* 4/4 */ inst_after_func = NO_FUNC; else { path_cycles = get_cycles(end_use, num_path_inst, only_fetch); if (compute_pd) path_pipeline[num_path_inst] = inf_inst_ptr -> number; /* 34. determine when each stage in this path is 1st occupied and who those pioneer occupants are */ for (stage = 0; stage < NUM_STAGES; stage++) { if (beg_occupant[stage] == NULL && /* update only if nec (2/23) */ begin_use[num_path_inst][stage] > 0) { path_beg_cycles[stage] = begin_use[num_path_inst][stage]; beg_occupant[stage] = inf_inst_ptr; } if (end_use[num_path_inst][stage] > 0) { end_occupant[stage] = inf_inst_ptr; /* it's me */ } } if (compute_pd) pipeline_cycles[num_path_inst] = path_cycles; if (verbose){ printc ("Time for path = %d\n", path_cycles); print_path_fields (path_ptr, tcase); printc ("Path's beg cycles are "); for (i = 0; i < NUM_STAGES; i++) printc ("%3d ", path_beg_cycles[i]); printc ("\n"); printc ("Path's beg occupants: "); for (i = 0; i < NUM_STAGES; i++) if (beg_occupant[i]) printc ("%3d ", beg_occupant[i] -> number); else printc (" "); printc ("\n"); printc ("Path's end cycles are "); } for (i = 0; i < NUM_STAGES; i++) { for (j = num_path_inst; j >= 0; j--) if (end_use[j][i] > 0) { /*BOOKMARK- MARKED- SEEMS TO BE A BIG BUG */ path_end_cycles[i] = path_cycles - end_use[j][i] + 1; //path_end_cycles[i] = end_use[j][i] ; break; } if (verbose) printc ("%3d ", path_end_cycles[i]); } for(i = 0;i = 0; j--) if (end_use[j][i] > 0) { if (verbose) printf("%d ",end_use[j][i]); break; } } if (verbose) { printf("\n"); printc ("\nPath's end occupants: "); for (i = 0; i < NUM_STAGES; i++) if (end_occupant[i]) printc ("%3d ", end_occupant[i] -> number); else printc (" "); printc ("\n"); } } /* end of else 4/4 */ #ifdef __SIB_DEBUG__ if( sib_verbose ) { /* printc( " --- \n" ) ; printc( "Time_Path::exit from FOR loop for instructions in basic block.\n" ) ; printc( "Timing for instruction : %s\n", inf_inst_ptr->inst ) ; printc( "path_cycles = %d \n", path_cycles ) ; printc( " --- \n\n" ) ; */ } #endif // __SIB_DEBUG__ } /* end of handling one instruction */ } /* end of timing all blocks in this path */ /*XXXXXXXXXXXXXXXXXXXXXXX*/ /*BY NOW ALL THE INSTRUCTIONS IN THE PATH HAVE BEEN TIME.. NOW TO PRINT THE INFO AND PASS IT OT THE PAT POINTER */ /* 35. now print ALL the inst's beg and end times for EACH stage in a table */ print_inst_time(begin_use, end_use, num_path_inst, path_pipeline); /* 36. Find # of occupied cycles for each stage - only needed in bc (6/13) */ if (tcase == BEST) { for (stage = 0; stage < NUM_STAGES; ++stage) { for (i = 0; i <= num_path_inst; ++i) { if (begin_use[i][stage] == 0) continue; path_ptr->occ_cycles[stage] += end_use[i][stage] - begin_use[i][stage] + 1; } } if (verbose){ print_array (path_ptr->occ_cycles, 0, NUM_STAGES, "occupied stages"); printc ("\n\n"); } } /* 37. We need to jump over inner loops or functions so not to wait too long while counting unnecessary cycles in step 38 (5/14) */ if (compute_pd) { cycle_jump = calc_cycle_jump (begin_use, end_use, pipeline_cycles, num_path_inst); /* 38. now print the pipeline */ /* The OR part of the following "if" statement adde by Sibin M on April 21, 2004. */ // #ifndef __SIB_DEBUG__ if ( verbose ) { /* #else if ( sib_verbose ) { */ // #endif // __SIB_DEBUG__ print_table_head(); for (cycle = 1; cycle <= path_cycles; cycle++) { if (cycle_jump && cycle_jump->from != NEVER && cycle == cycle_jump->from + 1) /* don't count thru inner cycles */ { cycle = cycle_jump->to; cycle_jump = cycle_jump->next; } all_empty = 1; for (stage = IF; stage < NUM_STAGES; stage++) { for (inst = 0; inst <= num_path_inst; inst++) { if (begin_use[inst][stage] <= cycle && end_use[inst][stage] >= cycle) { all_empty = 0; break; } if (begin_use[inst][stage] > cycle) break; if (inst == num_path_inst && end_use[inst][stage] < cycle) break; } } if (all_empty) continue; sprintf (tmp, "%6d: ", cycle); #ifdef INTERFACE strcat (pipe_diagram, tmp); #endif printc ("%8d: ", cycle); someone_fetched = -1; for (stage = IF; stage < NUM_STAGES; stage++) { /* look for an instruction that uses this stage */ for (inst = 0; inst <= num_path_inst; inst++) { if (begin_use[inst][stage] <= cycle && end_use[inst][stage] >= cycle) { sprintf (tmp, "%5d", path_pipeline[inst]); #ifdef INTERFACE strcat (pipe_diagram, tmp); #endif printc (tmp); if (stage == IF && begin_use[inst][stage] == cycle) someone_fetched = path_pipeline[inst]; break; } if (begin_use[inst][stage] > cycle) { /* no instruction occupies this cycle at this time */ sprintf (tmp, " "); #ifdef INTERFACE strcat (pipe_diagram, tmp); #endif printc (tmp); break; } if (inst == num_path_inst && end_use[inst][stage] < cycle) { /* we're past the last inst that uses this stage */ sprintf (tmp, " "); #ifdef INTERFACE strcat (pipe_diagram, tmp); #endif printc (tmp); break; } } } /* 39. off to the side of the pipeline, print the instruction (op & operands) which we are showing as just beginning its IF in this cycle */ if (someone_fetched >= 0) { for (bptr = path_ptr->path_list; bptr; bptr = bptr->next) { inf_bptr = Locate_Inf_Block (inf, bptr -> block_number); for (inf_inst_ptr = inf_bptr -> instruct_list; inf_inst_ptr; inf_inst_ptr = inf_inst_ptr -> next) { stats_inst_ptr = Locate_Stats_Inst (node_ptr -> instruct_list, inf_inst_ptr -> number); if (stats_inst_ptr == NULL) break; if (inf_inst_ptr -> number == someone_fetched) { printc (" %-5s %s %s %s", inf_inst_ptr -> inst, inf_inst_ptr -> op1.reg_list, inf_inst_ptr -> op2.reg_list, inf_inst_ptr -> op3.reg_list); break; } } } } (void) sprintf (tmp, "\n"); #ifdef INTERFACE (void) strcat (pipe_diagram, tmp); #endif printc (tmp); } } } /* end of `if (compute_pd)' */ /* 40. In the cached version, store this path's information. (4/25) */ /* * If there is a miss near end of prog, loading the program line may * survive cpu execution. 2/26/96 */ if (strcmp (node_ptr->name, "_main") == 0) poss_update (&path_cycles, last_word_loaded, "still loading at finis"); if (! overlap && tcase == WORST) { if (verbose) printc ("I'm adding %d to path cycles from non overlapping\n", extra_nonoverlap); path_cycles += extra_nonoverlap; /* add the extra here (8/28) */ } for (stage = 0; stage < NUM_STAGES; ++stage) { path_ptr->beg_occupant[stage] = beg_occupant[stage]; path_ptr->end_occupant[stage] = end_occupant[stage]; path_ptr->beg_cycles[stage] = path_beg_cycles[stage]; path_ptr->end_cycles[stage] = path_end_cycles[stage]; } path_ptr->path_cycles = path_cycles; path_ptr->extra_latency = extra_latency; path_ptr->inner_ff_trans = inner_ff_trans; path_ptr->icache_misses = icache_misses; path_ptr->dcache_misses = dcache_misses; // printf("ic %d dc %d stat %d\n",icache_misses, dcache_misses,data_status); path_ptr->extra_time = larger (path_ptr->shared_fm_time, larger (path_ptr->extra_fh_time, path_ptr->extra_fm_time) ); path_ptr->total_latency = (path_ptr->extra_latency + path_ptr->icache_misses + path_ptr->dcache_misses); if (naive_too) path_ptr->bc_pc_nai_time += num_path_inst + 1; if (verbose) printc ("At end of Time_Path, path_cycles = %d and " "path's extra time = %d\n", path_cycles, path_ptr->extra_time); if (path_ptr->first_line_delay != 0 && verbose) printc("path's first line delay = %d\n", path_ptr->first_line_delay); if (path_ptr->delay_after_fm != 0 && verbose) printc ("path's delay after fm = %d\n", path_ptr->delay_after_fm); if (path_ptr->last_line_delay != 0 && verbose) printc ("path's last line delay = %d\n", path_ptr->last_line_delay); if (verbose) { if (tcase == WORST) { printc ("In this path I encountered %d first misses.\n", path_ptr->fm_encountered); printc ("In this path I encountered %d first hits.\n", path_ptr->fh_encountered); } else printc ("For this path, best case extra time = %d\n", path_ptr->bc_extra_time); if (tcase == WORST && cache_only) printc ("path's wc pipe_nai_time = %d\n", path_ptr->wc_pipe_nai_time); else if (tcase == BEST && cache_only) printc ("path's bc pipe_nai_time = %d\n", path_ptr->bc_pipe_nai_time); } /* when stored in the path information, release times (not src needed times) will be relative to the end of the path (or node) (4/30) */ for (i = 0; i < NUMREGS; ++i) { if (release_time[i] != NEVER) path_ptr->release_time[i] = path_cycles - release_time[i] + 1; } /*MARKED - MORE IGNORED WRAP AROUND CACHE INFO */ #ifdef TRASH if (wrap_around) { if (verbose) { printc ("end of path: Loading cache line %d\n", cache_line); print_array (avail_time, 0, words_per_line, "avail times"); } /* store ending cache line loading info with the path (1/29/96) */ path_ptr->end_cache_line = cache_line; for (i = 0; i < words_per_line; ++i) { path_ptr->end_avail_time[i] = avail_time[i]; path_ptr->fm_end_avail_time[i] = fm_avail_time[i]; } path_ptr->last_fm = fm_inst; if (verbose) { print_array (path_ptr->beg_word_num, 0, words_per_line, "path's beg word numbers"); print_array (fm_avail_time, 0, words_per_line, "path's fm end avail time"); printc ("path in %s: fm inst was %d\n", node_ptr->name, fm_inst); } path_ptr->finish_load = largest_entry (path_ptr->end_avail_time, 0, words_per_line) - end_use[num_path_inst][IF]; poss_update (&path_ptr->finish_load, 0, "finish load can't be neg"); if (verbose) { printc ("path's loading finishes %d cycles after IF done\n", path_ptr->finish_load); /* print and store delay after fm list 3/18/96 */ print_delay_after_fm_list (delay_after_fm_head); path_ptr->delay_after_fm_head = delay_after_fm_head; } } #endif poss_update (&max_memory_count, memory_count, "memory_count"); if (compute_pd) { free (path_pipeline); free (pipeline_cycles); free (cycle_jump_head); memory_count -= 2 * tot_path_inst * sizeof (int) + sizeof (struct cycle_jump_node); } free (begin_use); free (end_use); free (dest_reg); free (dest_ready); free (stage_src1_needed); free (stage_src2_needed); free (inst_cond_code); free (inst_data_type); memory_count -= tot_path_inst * (2 * sizeof (row) + sizeof (string) + 5 * sizeof (int)); if (verbose){ printc ("fm_list at end of Time_Path\n"); Print_Cache_List(*fm_list); } #ifdef __SIB_DEBUG__ if( sib_verbose ) { /* printc( "---------------------------------------\n" ) ; printc( "Invocation Number : %d\n", invocation_num ) ; printc( "iterations_handled = %d \t max_iterations = %d \n", iterations_handled, max_iterations ) ; printc( "Exit : Time_Path()\n" ) ; printc( "---------------------------------------\n" ) ; */ } #endif // __SIB_DEBUG__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ if (path_polynomial) { tmp_term = (struct term*)malloc(sizeof(struct term)); memset(tmp_term, 0, sizeof(struct term)); memory_count+=sizeof(struct term); tmp_term->power = 1; tmp_term->coefficient = path_cycles - child_overcount_time; add_term_structure(path_polynomial, tmp_term); if (call_site == 'a') { printf("Pretty printing from within Time_Path:\n"); pretty_print_list(path_polynomial); printf("\n"); } } #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ return path_cycles; } /*END */ /***************************************** * * FUNCTION NAME: Locate_Child_Loop_Header * * DESCRIPTION: * This function will search through all the sibling nodes of root in search * for a node with the same header block passed in. * If none is found, NULL is returned. * *****************************************/ struct tnode * Locate_Child_Loop_Header( int header, struct tnode* root ) { struct tnode *node_ptr = NULL; if (root) { for (node_ptr = root->child; node_ptr && node_ptr->loop_header_block != header; node_ptr = node_ptr->sibling) ; } return node_ptr; } /***************************************** * * FUNCTION NAME: Locate_Stats_Inst * *****************************************/ struct inst_cat * Locate_Stats_Inst( struct inst_cat* inst_list, int inst_number ) { struct inst_cat *inst_ptr; for (inst_ptr = inst_list; inst_ptr; inst_ptr = inst_ptr->next) { if (inst_ptr->inst_number == inst_number) { return inst_ptr; } } return NULL; } void error( char* msg ) { fprintf (stderr, "%s\n", msg); exit(1); } void poss_update( int* a, int b, char* message ) /* assign a = b if b > a */ { if (b > *a) { *a = b; if (*message && verbose) printc ("%s: new value is %d\n", message, *a); } } void print_array( int array[], int initial, int howmany, char* message ) { int i; if (*message) printc ("%s: ", message); for (i = initial; i < initial + howmany; ++i) printc ("%3d ", array[i]); printc ("\n"); } void global_info( char c ) { int i; printc ("%c\n", c); printc ("path_cycles = %d\n", path_cycles); printc ("path beg cycles: "); for (i = 0; i < NUM_STAGES; i++) { printc ("%3d ", path_beg_cycles[i]); } printc ("\n"); printc ("begin occupants: "); for (i = 0; i < NUM_STAGES; i++) { if (beg_occupant[i]) printc ("%3d ", beg_occupant[i] -> number); else printc (" "); } printc ("\n"); printc ("path end cycles: "); for (i = 0; i < NUM_STAGES; i++) { printc ("%3d ", path_end_cycles[i]); } printc ("\n"); printc ("end occupants : "); for (i = 0; i < NUM_STAGES; i++) { if (end_occupant[i]) printc ("%3d ", end_occupant[i] -> number); else printc (" "); } printc ("\n\n"); } void print_components( int iterations, int time, int extra_fh, int extra_fm, int shared_fm, int first_line_delay, int delay_after_fm, int last_line_delay, int fm_dead_cycles ) { printc ("loop's (union) total time after %d iterations is %d\n", iterations, time); printc ("loop's (union) total extra fh time after %d iterations is %d\n", iterations, extra_fh); printc ("loop's (union) total extra fm time after %d iterations is %d\n", iterations, extra_fm); printc ("loop's (union) total shared fm time after %d iterations is %d\n", iterations, shared_fm); #ifdef TRASH if (wrap_around) { printc ("loop's (union) total 1st line delay after %d iterations = %d\n", iterations, first_line_delay); printc ("loop's (union) total delay after fm after %d iterations = %d\n", iterations, delay_after_fm); printc ("loop's (union) tot last line delay after %d iterations = %d\n", iterations, last_line_delay); printc ("loop's (union) total fm dead cycles after %d iterations = %d\n", iterations, fm_dead_cycles); } #endif } void print_union( struct union_list_node * beg_head[], struct union_list_node * end_head[] ) { int stage; for (stage = 0; stage < NUM_STAGES; ++stage) { if (beg_head[stage] && beg_head[stage]->occupant) printc ("stage %d beg-cycle %2d occupant %2d\n", stage, beg_head[stage]->cycles, beg_head[stage]->occupant->number); if (end_head[stage] && end_head[stage]->occupant) printc ("stage %d end-cycle %2d occupant %2d\n", stage, end_head[stage]->cycles, end_head[stage]->occupant->number); } } void print_path_fields( struct loop_path_node* path_ptr, enum time_case tcase ) { if (tcase == WORST) { if (path_ptr->extra_fh_time != 0) { printc ("extra fh time for path = %d\n", path_ptr->extra_fh_time); } if (path_ptr->extra_fm_time != 0) { printc ("extra fm time for path = %d\n", path_ptr->extra_fm_time); } } else if (path_ptr->bc_extra_time != 0) { printc ("extra time for path = %d\n", path_ptr->bc_extra_time); } #ifdef TRASH if (wrap_around && path_ptr->first_line_delay != 0) { printc ("first line delay for path = %d\n", path_ptr->first_line_delay); } if (wrap_around && path_ptr->delay_after_fm != 0) { printc ("delay after fm for path = %d\n", path_ptr->delay_after_fm); } if (wrap_around && path_ptr->last_line_delay != 0) { printc ("last line delay for path = %d\n", path_ptr->last_line_delay); } if (wrap_around && path_ptr->fm_dead_cycles != 0) { printc ("fm dead cycles for path = %d\n", path_ptr->fm_dead_cycles); } #endif } void find_reg_delay( int* delay, int union_release_time[], int union_src_needed[] ) { int i; for (i = 0; i < NUMREGS; ++i) { if (union_release_time[i] != NEVER && union_src_needed[i] != NEVER && union_release_time[i] + union_src_needed[i] < 5) { printc ("register %d...", i); poss_update (delay, 5 - union_release_time[i] - union_src_needed[i], "union register delay"); } } if (verbose) printc ("union register delay is %d\n", *delay); } void read_delays() { int i; char answer[80], filename[12], title[80]; FILE *fp; strcpy (filename, "../ConfigFiles/trace.config"); if (!(fp = fopen (filename, "r"))) { error ("time.bin (read_delays) : can't find trace.config"); } if (!fgets(title, 80, fp)) error("error reading title in config file"); if (fscanf(fp, "Number of sets: %u\n", &numsets) != 1) error("error reading numsets in config file"); if (numsets > MAXNUMSETS) error("number of sets requested exceeds MAXNUMSETS"); if (fscanf(fp, "Set size: %u\n", &setsize) != 1) error("error reading setsize in config file"); if (setsize > MAXSETSIZE) error("set size requested exceeds MAXSETSIZE"); if (fscanf(fp, "Line size: %u\n", &linesize) != 1) error("error reading linesize in config file"); if ((linesize) & (linesize - 1)) error("line size must be a power of two"); if (linesize < 4) error ("line size must be at least 4 bytes (1 word)"); if (fscanf(fp, "All cache hits: %s\n", answer) != 1) error ("no decision about whether cache accesses are all hits"); all_cache_hits = (*answer == 'y') ? YES : NO; words_per_line = linesize / 4; memory_count += 2 * words_per_line * sizeof (int); delay_list = (int *) calloc (words_per_line, sizeof (int)); avail_time = (int *) calloc (words_per_line, sizeof (int)); delay_list_temp = delay_list; if (fscanf (fp, "Miss delay: %u", delay_list_temp) != 1) error ("error reading miss delays in config file"); if (all_cache_hits == YES) *delay_list_temp = 0; /* no delay */ for (i = 1; i < words_per_line; ++i) { if (fscanf (fp, "%u", ++delay_list_temp) != 1) error ("too few miss delays"); if (all_cache_hits == YES) *delay_list_temp = 0; } if (all_cache_hits == YES) miss_delay = 1; else miss_delay = delay_list[0]; if (fscanf (fp, "%u", &i) == 1) error ("too many miss delays"); fscanf (fp, "\n"); if (fscanf (fp, "CPU MHz: %u", &mhz) != 1) error ("error reading CPU MHz in config file"); fclose (fp); /*AMOL: What is this used for */ max_intraline_delay = delay_list[words_per_line - 1] - delay_list[0] - 1; max_jumping_delay = delay_list[words_per_line - 1] - delay_list[0]; } /* * Given a pointer to an instruction in some block, start counting the insts * until we reach the end. The result is this inst number if we were to count * from the end. (2/14/96) */ int count_from_end( struct instruct* instruct_ptr ) { struct instruct *ip; int howmany = 0; for (ip = instruct_ptr; ip; ip = ip->next) ++howmany; return howmany; } int *find_order_of_fill( int miss_word ) { int *order_of_fill, i, next_word; memory_count += words_per_line * sizeof (int); order_of_fill = (int *) calloc (words_per_line, sizeof (int)); order_of_fill[miss_word] = 0; next_word = (miss_word % 2 == 0) ? (miss_word + 1) : (miss_word - 1); order_of_fill[next_word] = 1; for (i = 2, next_word = miss_word + ((miss_word % 2 == 0) ? 2 : 1); i < words_per_line; ++i, ++next_word) { next_word %= words_per_line; order_of_fill[next_word] = i; } return order_of_fill; } int *get_inst_word_list( struct instruct* instruct_ptr ) { int *list, i; struct instruct *ip; memory_count += words_per_line * sizeof (int); list = (int *) calloc (words_per_line, sizeof (int)); for (ip = instruct_ptr, i = 0; ip; ip = ip->next, ++i) { list[i] = ip->prog_offset % words_per_line; } return list; } struct cache_block *find_fm_cblock( int fm_inst_number, struct cache_block* fm_list ) { struct cache_block *cblock; for (cblock = fm_list; cblock; cblock = cblock -> next) { if (cblock->inst_number == fm_inst_number) break; } if (! cblock) { if (verbose) printc ("I've been betrayed. I can't find it.\n"); } else { if (verbose) printc ("current cat for %d is %c\n", fm_inst_number, cblock->cat_list->worst_case); } return cblock; } void print_delay_after_fm_list( struct delay_after_fm_node* head ) { struct delay_after_fm_node *temp; printc ("delay after fm linked list:\n"); if (! head) { printc (" (list is empty)\n"); return; } for (temp = head; temp; temp = temp->next) printc (" inst = %d; cycles = %d\n", temp->inst, temp->cycles); } void print_inst_num_list( struct inst_num_node* head ) { struct inst_num_node *p; printc ("Inst num list: "); for (p = head; p; p = p->next) { printc ("%d ", p->num); } printc ("\n"); } void print_cache_info( struct loop_path_node* longest_path_ptr ) { extern void Print_Cache_List(); printc ("loop end cache line = %d\n", longest_path_ptr->end_cache_line); print_array (longest_path_ptr->fm_end_avail_time, 0, words_per_line, "loop fm end avail times"); print_array (longest_path_ptr->end_avail_time, 0, words_per_line, "loop end avail times"); printc ("path's first word num is %d\n", longest_path_ptr->first_word_num); printc ("path's beg prog line = %d\n", longest_path_ptr->beg_cache_line); printc ("path's end prog line = %d\n", longest_path_ptr->end_cache_line); Print_Cache_List(longest_path_ptr->worst_fm_list); } int need2calculate( struct loop_path_node* path_ptr, struct cache_block* fm_list, struct cache_block* fh_list, enum cache_access fm_status, enum cache_access fh_status ) { extern int cache_blocks_match(); /*if (verbose) return 1;*/ if (verbose) printc ("Checking need2calcuate for path %d\n", path_ptr->number); if (fm_status != path_ptr->prev_fm_status || fh_status != path_ptr->prev_fh_status) { if (verbose) printc ("cache_access status has changed since path was last evaluated\n"); return 1; } if (! cache_blocks_match (fm_list, path_ptr->prev_fm_list) || ! cache_blocks_match (fh_list, path_ptr->prev_fh_list)) { if (verbose) printc ("cache information has changed since path was last evaluated\n"); return 1; } if (verbose) printc ("No, we don't need to calculate this path's time\n"); return 0; } int incr_new_total_time( int prev_total, int* prev_end, int* this_beg, int* this_end ) { int i, new_total = 0, prop_beg[NUM_STAGES], prop_end[NUM_STAGES], diffs[NUM_STAGES]; if (verbose) printc ("entering incr_new_total_time\n"); for (i = 0; i < NUM_STAGES; ++i) { prop_beg[i] = this_beg[i] + largest_entry(prev_end, 0, NUM_STAGES); prop_end[i] = this_end[i] + largest_entry(prev_end, 0, NUM_STAGES); diffs[i] = prop_beg[i] - prev_end[i]; } /* Following change made by Sibin M on May 1, 2004. * Removed the +1 at the end of the calculation for new_total. * Is this correct though ???? */ new_total = largest_entry(prop_end, 0, NUM_STAGES) - smallest_entry(diffs, 0, NUM_STAGES) ; // + 1; /* Above change made by Sibin M on May 1, 2004. * Removed the +1 at the end of the calculation for new_total. * Is this correct though ???? */ if (prev_total > largest_entry(prev_end, 0, NUM_STAGES)) new_total += prev_total - largest_entry(prev_end, 0, NUM_STAGES); if (verbose) { print_array(prop_end, 0, NUM_STAGES, "prop_end"); print_array(diffs, 0, NUM_STAGES, "diffs"); printc ("\nnew total is %d\n", new_total); printc ("returning %d from incr_new_total_time\n", new_total - prev_total); } return new_total - prev_total; } /***************************************** * * FUNCTION NAME: compare_equations * *****************************************/ /*the compare equations function will look at the slope and y intercept of 2 lines and return the line which always gives the larger value of y. when the 2 lines intersect in the first quadrant, we need to use a tie breaker. function returns true if first line is greater... else false */ int compare_equations (int i1, int m1, int i2, int m2,int *COND) { int x = 0; *COND = false; if (m1>=m2 && i1>i2) return true; if (m1<=m2 && i1m2) return true; else return false; } if (m1>m2 && i1 MAX_LATENCY_RANGE) return false; } if (m1i2) { x = (i1-i2)/(m2-m1); if (x < MIN_LATENCY_RANGE) return false; else if (x > MAX_LATENCY_RANGE) return true; } *COND = true; fprintf(stderr, "DANGER... 2 close to tell !!\n m1 = %d i1 = %d m2 = %d i2 = %d and X = %d\n", m1, i1, m2, i2, x); printf("DANGER... 2 close to tell !!\n m1 = %d i1 = %d m2 = %d i2 = %d and X = %d\n", m1, i1, m2, i2, x); return true; } int poss_update_equations (int *i1, int *m1, int i2, int m2) { int condition; int cond1; cond1 = false; condition = compare_equations(i2, m2, *i1, *m1,&cond1); if (condition){ if (!cond1) { *i1 = i2; *m1 = m2; } else eq_merge(m1,i1,m2,i2); } return condition; } int eq_merge (int *m1, int *i1, int m2, int i2) { int i,j,k,l,t; float a,b,c; i = *i1 + (*m1)*MIN_LATENCY_RANGE; j = i2 + (m2)*MIN_LATENCY_RANGE; if (i b) return a; else return b; } /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */ #ifdef __PARAMETRIC_TA__ /* fix_poly_string - There are times when a polynomial expression contains * a register instead of the variable because of compiler optimizations. * Takes 3 parameters: a polynomial string; a list of variables separated * by commas that contains 1 or more registers; a list of original var names. * Basically, we have to replace each occurrence of each register with the * corresponding var name. 3/26/05 * For example, our params may be: "(25+55*r[11])*z" , "r[11], z" , "x, z" * and we want to return "(25+55*x)*z" */ char *fix_poly_string( char* old_poly_str, char* old_var_list, char* new_var_list, struct tnode* node) { static char *char_ptr; char new_poly_str[1000], temp_poly_str[1000], before_reg[1000], reg[10], after_reg[1000], temp_var_list[1000], token[100]; int reg_begin, reg_end, token_num, i; strcpy(temp_poly_str, old_poly_str); /* Keep working as long as we see a bracket in the string. */ while(strchr(temp_poly_str, '[')) { /* Find exactly where the (first) register is. */ reg_begin = strlen(temp_poly_str) - strlen(strchr(temp_poly_str, '[')) - 1; reg_end = strlen(temp_poly_str) - strlen(strchr(temp_poly_str, ']')); strncpy(before_reg, temp_poly_str, reg_begin); strncpy(reg, temp_poly_str + reg_begin, reg_end - reg_begin + 1); strcpy(after_reg, temp_poly_str + reg_end + 1); printf("before_reg = %s reg = %s after_reg = %s\n", before_reg, reg, after_reg); /* Now time to find the register in the "old" list, and the corresponding * variable name in the "new" (original!) vars list. */ strcpy(temp_var_list, old_var_list); strcpy(token, strtok(temp_var_list, " ,")); token_num = 0; while(strcmp(token, reg) != 0) { strcpy(token, strtok(0, " ,")); ++token_num; } /* Now we know how many tokens (token_num) to traverse in new_var_list. */ strcpy(temp_var_list, new_var_list); strcpy(token, strtok(temp_var_list, " ,")); for (i = 0; i < token_num; ++i) strcpy(token, strtok(0, " ,")); /* Now all we need to do is replace reg with token in the poly string. */ printf("fix_poly_string: Replacing %s with %s\n", reg, token); strcpy(new_poly_str, before_reg); strcat(new_poly_str, token); strcat(new_poly_str, after_reg); printf("new_poly_str = %s\n", new_poly_str); /* I think we should also change the polynomial structure with a similar * change of variable for register. Within the node->polynomial_worst_case * look for "variable" field of some * term_start[next...]->coefficient_ptr->term_start[next...] ... * that matches our "reg" and replace it with our "token". */ replace_reg(node->polynomial_worst_case, reg, token); /* So we can continue to next register (if any), copy back into temp. */ strcpy(temp_poly_str, new_poly_str); } char_ptr = &new_poly_str[0]; printf("fix_poly_string: returning %s\n", char_ptr); return char_ptr; } /* *Mike Root, 12-15-04. *This fuction creates a new C output file containing a copy of the input file. It adds a max function *and inserts an eval function before each level 1 loop. */ void createEvals(int argc, char* argv[], struct tnode* programTree, struct inf_func_element* inf) { FILE *inFilePtr, *outFilePtr; char inFileName[100],outFileName[100],cFileName[100],inFile[1000000]; char c, *inputLine, *char_ptr, *new_poly_string; char commentOne[] = "/*Insert_Eval_Functions_Here*/"; char commentTwo[] = "/*Begin_Original_C_Code_with_Eval_Calls*/"; char functionName[100] = "eval_function"; char originalVars[100] = "", originalVarsCopy[100] = "", originalVars2[100]; char variables[100], token[100], temp[100]; int containsEvals, fnameSize, inFileSize, i, inputLineNum, startLine, minStartLine, currentLine, loop_num=0, notDone=1, variableNum=1; struct tnode *node; struct block_element *block; struct loop_element *loop; struct loop_block_list *lbl; /*Get c file name and open file*/ strcpy(inFileName, argv[argc-1]); strcat(inFileName, ".c"); inFilePtr = fopen(inFileName,"r"); if(strstr(inFileName, "_eval")!= NULL) { /*when containsEvals == 1 than the input file already has the max expresion and eval expressions inserted into the orginal c file. All that needs to be updated are the polynomial expressions within each call to update schedule*/ containsEvals = 1; } else containsEvals = 0; /*Create output filename (output file name = input file name + "_eval.c") and open file for writing*/ strcpy(outFileName, argv[argc-1]); strcat(outFileName, "_eval.c"); outFilePtr = fopen(outFileName,"w"); /*fprintf(outFilePtr, "extern void updateSchedule();\n\n");*/ if(containsEvals == 0) { /*This inserts the max expression into the output file*/ fprintf(outFilePtr, "int max(a, b)\n int a;\n int b;\n{\n if(a>b)\n return a;\n else\n return b;\n}\n\n"); /*This line is included so that the output file compiles and does not complain of not recognizing updateSchedule()*/ fprintf(outFilePtr, "int updateSchedule(x)\n int x;\n{\n return 1;\n}\n\n"); fprintf(outFilePtr, "%s\n", commentOne); for(node=programTree; node; node=node->next) { if(node->loop_paths->nest_level ==1) { loop_num++; start_poly_string(node->polynomial_worst_case); collect_variables(node->polynomial_worst_case, 0); fprintf(outFilePtr, "eval_function%i(%s)", loop_num, variables_string); collect_variables(node->polynomial_worst_case, 1); fprintf(outFilePtr, "\n%s\n{\n updateSchedule(%s);\n}\n", variables_string, poly_string); collect_variables(node->polynomial_worst_case, 0); fprintf(outFilePtr, "/*\n%s\n*/\n", variables_string); } } fprintf(outFilePtr, "%s\n", commentTwo); /*Find all loops at nest level 1 and insert eval fuction code before loop*/ currentLine = 1; loop_num = 0; for(node=programTree; node; node=node->next) { inf = node->inf_ptr; if(node->loop_paths->nest_level ==1) { loop_num++; lbl = node->loop_paths->path->path_list; block = Locate_Inf_Block(inf, lbl->block_number); minStartLine = block->src_start; /*loop through all blocks in a loop and find the min src code line*/ for (lbl = node->loop_paths->path->path_list; lbl; lbl = lbl->next) { block = Locate_Inf_Block(inf, lbl->block_number); startLine = block->src_start; if(startLine < minStartLine) minStartLine = startLine; } /*At some point we need to distinguish the parent*/ for(i=currentLine;ipolynomial_worst_case); collect_variables(node->polynomial_worst_case, 2); /*fprintf(outFilePtr, "poly_string: %s !!!\n", poly_string);*/ fprintf(outFilePtr, " eval_function%i(%s);\n", loop_num, variables_string); currentLine = minStartLine; printf("I am '%s' parent is '%s' header = %d start line = %d\n", node->name, node->parent->name, node->loop_header_block, minStartLine); } } } else /* and now, the case where input file was already ..._eval.c */ { /* read the input source file up to the point where we inserted the first * eval function (i.e. eval_function1) */ while(notDone) { inputLine = fgets(inFile, 10000, inFilePtr); if(strstr(inputLine, commentOne) != NULL) notDone = 0; fprintf(outFilePtr, "%s", inputLine); } for(node=programTree; node; node=node->next) { /*for each loop at nest level one we are re-writing the eval_function *after re-running the timing analyzer in order to update the worstcase *polynomial being passed to updateSchedule*/ if(node->loop_paths->nest_level == 1) { loop_num++; start_poly_string(node->polynomial_worst_case); collect_variables(node->polynomial_worst_case, 0); if(strstr(variables_string, "[")!=NULL) { printf("\nFound brackets\n\n"); temp[0] = '\0'; itoa(loop_num, temp); strcat(functionName, temp); printf("\n\nCreateEvals: functionName = %s\n\n", functionName); /* Next, continue reading from the source file - find the * "eval" function that corresponds to the node that had a * bracket in its polynomial that we just found. 3/25/05 */ notDone=1; while(notDone) { inputLine = fgets(inFile, 10000, inFilePtr); if(strstr(inputLine, functionName) != NULL) { notDone = 0; /* We want to avoid printing brackets in the output .c file, * and fortunately we can copy the parameters as they were in * the input ..._eval.c source file */ fprintf(outFilePtr, "%s", inputLine); inputLine = fgets(inFile, 10000, inFilePtr); fprintf(outFilePtr, "%s", inputLine); /* go to comment after eval function to read original var list */ for(i=0;i<4;i++) { inputLine = fgets(inFile, 10000, inFilePtr); } inputLine = fgets(inFile, 10000, inFilePtr); strcpy(originalVars, inputLine); printf("OriginalVars = %s\n", originalVars); inputLine = fgets(inFile, 10000, inFilePtr); } } /* Note that at this point the "variables_string" contains a * variable with a bracket as in: "r[11], z". e.g. We want "x, z". * We tokenize the "variables" string to find the bracketed one. */ variableNum = 1; strcpy(variables, variables_string); printf("CreateEvals: Variables = %s\n", variables); strcpy(token, strtok(variables, ",")); printf("CreateEvals: Token = %s\n", token); /* Now let's find the bracketed variable (register name) in our * list of variables. */ while(token!=NULL) { if(strstr(token, "[")!=NULL) { printf("CreateEvals: Token contains bracket.\n"); /************************************************ * I think it's important at this point to replace the * register with its true name in the polynomial. * And I think we need to find and replace this variable name * before we write the line of code containing the polynomial. * I thought of a new way to replace the register. 3/26/05 * It involves a new function called fix_poly_string. * I'm going to print out new_poly_string instead of poly_string ************************************************/ printf("We want to re-write the expr %s by replacing the " "registers from the list %s with the corresponding " "variable names from %s\n", poly_string, variables_string, originalVars); new_poly_string = fix_poly_string(poly_string, variables_string, originalVars, node); /* It looks like something happens to originalVars, because * in testing it seems to become the empty string (?) 3/27/05 * So I'm going to save them into a new string and print that * out in a comment after the call to updateSchedule. */ strcpy(originalVars2, originalVars); printf("Variable Number is: %i\n", variableNum); strcpy(originalVarsCopy, originalVars); set_old_variable(originalVarsCopy, variableNum); replace_var_polynomial(node->polynomial_worst_case, token); printf("old poly: %s\n", poly_string); start_poly_string(node->polynomial_worst_case); printf("new poly: %s\n", poly_string); } fprintf(outFilePtr, "{\n updateSchedule(%s);\n}\n", new_poly_string); /* Changed to print originalVars2 because originalVars for * some reason empty - see above comment. */ fprintf(outFilePtr, "/***\n%s\n***/\n", originalVars2); char_ptr = strtok(NULL, ","); if (! char_ptr) break; strcpy(token, char_ptr); printf("CreateEvals: Token = %s, Variables = %s\n", token, variables); variableNum++; } } else { fprintf(outFilePtr, "eval_function%i(%s)", loop_num, variables_string); collect_variables(node->polynomial_worst_case, 1); fprintf(outFilePtr, "\n%s\n{\n updateSchedule(%s);\n}\n", variables_string, poly_string); /*move past eval_function in the input file, each eval function contains 8 lines of code including the comment preceding the eval function*/ for(i=0;i<8;i++) { inputLine = fgets(inFile, 10000, inFilePtr); } } } } fprintf(outFilePtr, "%s\n", commentTwo); } /*Read in the remainder of the input file and copy it to out file*/ while (fscanf(inFilePtr,"%c",&c) != -1) { inFile[inFileSize++] = c; } fprintf(outFilePtr, "%s",inFile); fclose(outFilePtr); } #endif // __PARAMETRIC_TA__ /* Added by Sibin M on Mar 31, 2005. * Adding handling of parametric TA. */