bifrost/r38p2/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c - manifest_repos/mali-driver - Git at Google

 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
  * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
  * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
  */

 #include "mali_kbase_ipa_counter_common_csf.h"
 #include "ipa/mali_kbase_ipa_debugfs.h"

 #define DEFAULT_SCALING_FACTOR 5

 /* If the value of GPU_ACTIVE is below this, use the simple model
  * instead, to avoid extrapolating small amounts of counter data across
  * large sample periods.
  */
 #define DEFAULT_MIN_SAMPLE_CYCLES 10000

 /* Typical value for the sampling interval is expected to be less than 100ms,
  * So 5 seconds is a reasonable upper limit for the time gap between the
  * 2 samples.
  */
 #define MAX_SAMPLE_INTERVAL_MS ((s64)5000)

 /* Maximum increment that is expected for a counter value during a sampling
  * interval is derived assuming
  * - max sampling interval of 1 second.
  * - max GPU frequency of 2 GHz.
  * - max number of cores as 32.
  * - max increment of 4 in per core counter value at every clock cycle.
  *
  * So max increment = 2 * 10^9 * 32 * 4 = ~2^38.
  * If a counter increases by an amount greater than this value, then an error
  * will be returned and the simple power model will be used.
  */
 #define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1)

 static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
 {
 	s64 rtn;

 	if (a > 0 && (S64_MAX - a) < b)
 		rtn = S64_MAX;
 	else if (a < 0 && (S64_MIN - a) > b)
 		rtn = S64_MIN;
 	else
 		rtn = a + b;

 	return rtn;
 }

 static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value)
 {
 	/* Range: 0 < counter_value < 2^38 */

 	/* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */
 	return counter_value * (s64)coeff;
 }

 /**
  * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control
  * @model_data: Pointer to counter model data
  *
  * Register IPA counter model as a client of kbase_ipa_control, which
  * provides an interface to retrieve the accumulated value of hardware
  * counters to calculate energy consumption.
  *
  * Return: 0 on success, or an error code.
  */
 static int
 kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
 {
 	struct kbase_device *kbdev = model_data->kbdev;
 	struct kbase_ipa_control_perf_counter *perf_counters;
 	u32 cnt_idx = 0;
 	int err;
 	size_t i;

 	/* Value for GPU_ACTIVE counter also needs to be queried. It is required
 	 * for the normalization of top-level and shader core counters.
 	 */
 	model_data->num_counters = 1 + model_data->num_top_level_cntrs +
 				   model_data->num_shader_cores_cntrs;

 	perf_counters = kcalloc(model_data->num_counters,
 				sizeof(*perf_counters), GFP_KERNEL);

 	if (!perf_counters) {
 		dev_err(kbdev->dev,
 			"Failed to allocate memory for perf_counters array");
 		return -ENOMEM;
 	}

 	/* Fill in the description for GPU_ACTIVE counter which is always
 	 * needed, as mentioned above, regardless of the energy model used
 	 * by the CSF GPUs.
 	 */
 	perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW;
 	perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX;
 	perf_counters[cnt_idx].gpu_norm = false;
 	perf_counters[cnt_idx].scaling_factor = 1;
 	cnt_idx++;

 	for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
 		const struct kbase_ipa_counter *counter =
 			&model_data->top_level_cntrs_def[i];

 		perf_counters[cnt_idx].type = counter->counter_block_type;
 		perf_counters[cnt_idx].idx = counter->counter_block_offset;
 		perf_counters[cnt_idx].gpu_norm = false;
 		perf_counters[cnt_idx].scaling_factor = 1;
 		cnt_idx++;
 	}

 	for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
 		const struct kbase_ipa_counter *counter =
 			&model_data->shader_cores_cntrs_def[i];

 		perf_counters[cnt_idx].type = counter->counter_block_type;
 		perf_counters[cnt_idx].idx = counter->counter_block_offset;
 		perf_counters[cnt_idx].gpu_norm = false;
 		perf_counters[cnt_idx].scaling_factor = 1;
 		cnt_idx++;
 	}

 	err = kbase_ipa_control_register(kbdev, perf_counters,
 					 model_data->num_counters,
 					 &model_data->ipa_control_client);
 	if (err)
 		dev_err(kbdev->dev,
 			"Failed to register IPA with kbase_ipa_control");

 	kfree(perf_counters);
 	return err;
 }

 /**
  * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control.
  * @model_data: Pointer to counter model data
  */
 static void
 kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
 {
 	if (model_data->ipa_control_client) {
 		kbase_ipa_control_unregister(model_data->kbdev,
 					     model_data->ipa_control_client);
 		model_data->ipa_control_client = NULL;
 	}
 }

 static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data,
 			   const struct kbase_ipa_counter *const cnt_defs,
 			   size_t num_counters, s32 *counter_coeffs,
 			   u64 *counter_values, u32 active_cycles, u32 *coeffp)
 {
 	u64 coeff = 0, coeff_mul = 0;
 	s64 total_energy = 0;
 	size_t i;

 	/* Range for the 'counter_value' is [0, 2^38)
 	 * Range for the 'coeff' is [-2^21, 2^21]
 	 * So range for the 'group_energy' is [-2^59, 2^59) and range for the
 	 * 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e.
 	 * [-2^63, 2^63).
 	 */
 	for (i = 0; i < num_counters; i++) {
 		s32 coeff = counter_coeffs[i];
 		u64 counter_value = counter_values[i];
 		s64 group_energy = kbase_ipa_group_energy(coeff, counter_value);

 		if (counter_value > MAX_COUNTER_INCREMENT) {
 			dev_warn(model_data->kbdev->dev,
 				 "Increment in counter %s more than expected",
 				 cnt_defs[i].name);
 			return -ERANGE;
 		}

 		total_energy =
 			kbase_ipa_add_saturate(total_energy, group_energy);
 	}

 	/* Range: 0 <= coeff < 2^63 */
 	if (total_energy >= 0)
 		coeff = total_energy;
 	else
 		dev_dbg(model_data->kbdev->dev,
 			"Energy value came negative as %lld", total_energy);

 	/* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this
 	 * can be constrained further: the value of counters that are being
 	 * used for dynamic power estimation can only increment by about 128
 	 * maximum per clock cycle. This is because max number of shader
 	 * cores is expected to be 32 (max number of L2 slices is expected to
 	 * be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT &
 	 * SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle.
 	 * Each "beat" is defined as 128 bits and each shader core can
 	 * (currently) do 512 bits read and 512 bits write to/from the L2
 	 * cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment
 	 * [0, 4] per shader core per cycle.
 	 * We can thus write the range of 'coeff' in terms of active_cycles:
 	 *
 	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
 	 * coeff <= SUM(coeffN * counterN) * max_cores
 	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_cores
 	 *       (substitute max_counter = 2^2 * active_cycles)
 	 * coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores
 	 * coeff <=    2^4         *    2^21   * 2^2 * active_cycles * 2^5
 	 * coeff <= 2^32 * active_cycles
 	 *
 	 * So after the division: 0 <= coeff <= 2^32
 	 */
 	coeff = div_u64(coeff, active_cycles);

 	/* Not all models were derived at the same reference voltage. Voltage
 	 * scaling is done by multiplying by V^2, so we need to *divide* by
 	 * Vref^2 here.
 	 * Range: 0 <= coeff <= 2^35
 	 */
 	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
 	/* Range: 0 <= coeff <= 2^38 */
 	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));

 	/* Scale by user-specified integer factor.
 	 * Range: 0 <= coeff_mul < 2^43
 	 */
 	coeff_mul = coeff * model_data->scaling_factor;

 	/* The power models have results with units
 	 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
 	 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
 	 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
 	 * by 1000.
 	 * Range: 0 <= coeff_mul < 2^33
 	 */
 	coeff_mul = div_u64(coeff_mul, 1000u);

 	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
 	*coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16);

 	return 0;
 }

 int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
 {
 	struct kbase_ipa_counter_model_data *model_data =
 		(struct kbase_ipa_counter_model_data *)model->model_data;
 	struct kbase_device *kbdev = model->kbdev;
 	s32 *counter_coeffs_p = model_data->counter_coeffs;
 	u64 *cnt_values_p = model_data->counter_values;
 	const u64 num_counters = model_data->num_counters;
 	u32 active_cycles;
 	ktime_t now, diff;
 	s64 diff_ms;
 	int ret;

 	lockdep_assert_held(&kbdev->ipa.lock);

 	/* The last argument is supposed to be a pointer to the location that
 	 * will store the time for which GPU has been in protected mode since
 	 * last query. This can be passed as NULL as counter model itself will
 	 * not be used when GPU enters protected mode, as IPA is supposed to
 	 * switch to the simple power model.
 	 */
 	ret = kbase_ipa_control_query(kbdev,
 				      model_data->ipa_control_client,
 				      cnt_values_p, num_counters, NULL);
 	if (WARN_ON(ret))
 		return ret;

 	now = ktime_get_raw();
 	diff = ktime_sub(now, kbdev->ipa.last_sample_time);
 	diff_ms = ktime_to_ms(diff);

 	kbdev->ipa.last_sample_time = now;

 	/* The counter values cannot be relied upon if the sampling interval was
 	 * too long. Typically this will happen when the polling is started
 	 * after the temperature has risen above a certain trip point. After
 	 * that regular calls every 25-100 ms interval are expected.
 	 */
 	if (diff_ms > MAX_SAMPLE_INTERVAL_MS) {
 		dev_dbg(kbdev->dev,
 			"Last sample was taken %lld milli seconds ago",
 			diff_ms);
 		return -EOVERFLOW;
 	}

 	/* Range: 0 (GPU not used at all), to the max sampling interval, say
 	 * 1 seconds, * max GPU frequency (GPU 100% utilized).
 	 * 0 <= active_cycles <= 1 * ~2GHz
 	 * 0 <= active_cycles < 2^31
 	 */
 	if (*cnt_values_p > U32_MAX) {
 		dev_warn(kbdev->dev,
 			 "Increment in GPU_ACTIVE counter more than expected");
 		return -ERANGE;
 	}

 	active_cycles = (u32)*cnt_values_p;

 	/* If the value of the active_cycles is less than the threshold, then
 	 * return an error so that IPA framework can approximate using the
 	 * cached simple model results instead. This may be more accurate
 	 * than extrapolating using a very small counter dump.
 	 */
 	if (active_cycles < (u32)max(model_data->min_sample_cycles, 0))
 		return -ENODATA;

 	/* Range: 1 <= active_cycles < 2^31 */
 	active_cycles = max(1u, active_cycles);

 	cnt_values_p++;
 	ret = calculate_coeff(model_data, model_data->top_level_cntrs_def,
 			      model_data->num_top_level_cntrs,
 			      counter_coeffs_p, cnt_values_p, active_cycles,
 			      &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
 	if (ret)
 		return ret;

 	cnt_values_p += model_data->num_top_level_cntrs;
 	counter_coeffs_p += model_data->num_top_level_cntrs;
 	ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def,
 			      model_data->num_shader_cores_cntrs,
 			      counter_coeffs_p, cnt_values_p, active_cycles,
 			      &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);

 	return ret;
 }

 void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model)
 {
 	struct kbase_ipa_counter_model_data *model_data =
 		(struct kbase_ipa_counter_model_data *)model->model_data;
 	u64 *cnt_values_p = model_data->counter_values;
 	const u64 num_counters = model_data->num_counters;
 	int ret;

 	lockdep_assert_held(&model->kbdev->ipa.lock);

 	ret = kbase_ipa_control_query(model->kbdev,
 				      model_data->ipa_control_client,
 				      cnt_values_p, num_counters, NULL);
 	WARN_ON(ret);
 }

 int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model,
 		const struct kbase_ipa_counter *top_level_cntrs_def,
 		size_t num_top_level_cntrs,
 		const struct kbase_ipa_counter *shader_cores_cntrs_def,
 		size_t num_shader_cores_cntrs,
 		s32 reference_voltage)
 {
 	struct kbase_ipa_counter_model_data *model_data;
 	s32 *counter_coeffs_p;
 	int err = 0;
 	size_t i;

 	if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def ||
 	    !num_top_level_cntrs || !num_shader_cores_cntrs)
 		return -EINVAL;

 	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
 	if (!model_data)
 		return -ENOMEM;

 	model_data->kbdev = model->kbdev;

 	model_data->top_level_cntrs_def = top_level_cntrs_def;
 	model_data->num_top_level_cntrs = num_top_level_cntrs;

 	model_data->shader_cores_cntrs_def = shader_cores_cntrs_def;
 	model_data->num_shader_cores_cntrs = num_shader_cores_cntrs;

 	model->model_data = (void *)model_data;

 	counter_coeffs_p = model_data->counter_coeffs;

 	for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
 		const struct kbase_ipa_counter *counter =
 			&model_data->top_level_cntrs_def[i];

 		*counter_coeffs_p = counter->coeff_default_value;

 		err = kbase_ipa_model_add_param_s32(
 			model, counter->name, counter_coeffs_p, 1, false);
 		if (err)
 			goto exit;

 		counter_coeffs_p++;
 	}

 	for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
 		const struct kbase_ipa_counter *counter =
 			&model_data->shader_cores_cntrs_def[i];

 		*counter_coeffs_p = counter->coeff_default_value;

 		err = kbase_ipa_model_add_param_s32(
 			model, counter->name, counter_coeffs_p, 1, false);
 		if (err)
 			goto exit;

 		counter_coeffs_p++;
 	}

 	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
 	err = kbase_ipa_model_add_param_s32(
 		model, "scale", &model_data->scaling_factor, 1, false);
 	if (err)
 		goto exit;

 	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
 	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
 					    &model_data->min_sample_cycles, 1,
 					    false);
 	if (err)
 		goto exit;

 	model_data->reference_voltage = reference_voltage;
 	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
 					    &model_data->reference_voltage, 1,
 					    false);
 	if (err)
 		goto exit;

 	err = kbase_ipa_attach_ipa_control(model_data);

 exit:
 	if (err) {
 		kbase_ipa_model_param_free_all(model);
 		kfree(model_data);
 	}
 	return err;
 }

 void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model)
 {
 	struct kbase_ipa_counter_model_data *model_data =
 		(struct kbase_ipa_counter_model_data *)model->model_data;

 	kbase_ipa_detach_ipa_control(model_data);
 	kfree(model_data);
 }
	// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
	/*
	*
	* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
	*
	* This program is free software and is provided to you under the terms of the
	* GNU General Public License version 2 as published by the Free Software
	* Foundation, and any use by you of this program is subject to the terms
	* of such GNU license.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, you can access it online at
	* http://www.gnu.org/licenses/gpl-2.0.html.
	*
	*/

	#include "mali_kbase_ipa_counter_common_csf.h"
	#include "ipa/mali_kbase_ipa_debugfs.h"

	#define DEFAULT_SCALING_FACTOR 5

	/* If the value of GPU_ACTIVE is below this, use the simple model
	* instead, to avoid extrapolating small amounts of counter data across
	* large sample periods.
	*/
	#define DEFAULT_MIN_SAMPLE_CYCLES 10000

	/* Typical value for the sampling interval is expected to be less than 100ms,
	* So 5 seconds is a reasonable upper limit for the time gap between the
	* 2 samples.
	*/
	#define MAX_SAMPLE_INTERVAL_MS ((s64)5000)

	/* Maximum increment that is expected for a counter value during a sampling
	* interval is derived assuming
	* - max sampling interval of 1 second.
	* - max GPU frequency of 2 GHz.
	* - max number of cores as 32.
	* - max increment of 4 in per core counter value at every clock cycle.
	*
	* So max increment = 2 * 10^9 * 32 * 4 = ~2^38.
	* If a counter increases by an amount greater than this value, then an error
	* will be returned and the simple power model will be used.
	*/
	#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1)

	static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
	{
	s64 rtn;

	if (a > 0 && (S64_MAX - a) < b)
	rtn = S64_MAX;
	else if (a < 0 && (S64_MIN - a) > b)
	rtn = S64_MIN;
	else
	rtn = a + b;

	return rtn;
	}

	static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value)
	{
	/* Range: 0 < counter_value < 2^38 */

	/* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */
	return counter_value * (s64)coeff;
	}

	/**
	* kbase_ipa_attach_ipa_control() - register with kbase_ipa_control
	* @model_data: Pointer to counter model data
	*
	* Register IPA counter model as a client of kbase_ipa_control, which
	* provides an interface to retrieve the accumulated value of hardware
	* counters to calculate energy consumption.
	*
	* Return: 0 on success, or an error code.
	*/
	static int
	kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
	{
	struct kbase_device *kbdev = model_data->kbdev;
	struct kbase_ipa_control_perf_counter *perf_counters;
	u32 cnt_idx = 0;
	int err;
	size_t i;

	/* Value for GPU_ACTIVE counter also needs to be queried. It is required
	* for the normalization of top-level and shader core counters.
	*/
	model_data->num_counters = 1 + model_data->num_top_level_cntrs +
	model_data->num_shader_cores_cntrs;

	perf_counters = kcalloc(model_data->num_counters,
	sizeof(*perf_counters), GFP_KERNEL);

	if (!perf_counters) {
	dev_err(kbdev->dev,
	"Failed to allocate memory for perf_counters array");
	return -ENOMEM;
	}

	/* Fill in the description for GPU_ACTIVE counter which is always
	* needed, as mentioned above, regardless of the energy model used
	* by the CSF GPUs.
	*/
	perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW;
	perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX;
	perf_counters[cnt_idx].gpu_norm = false;
	perf_counters[cnt_idx].scaling_factor = 1;
	cnt_idx++;

	for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
	const struct kbase_ipa_counter *counter =
	&model_data->top_level_cntrs_def[i];

	perf_counters[cnt_idx].type = counter->counter_block_type;
	perf_counters[cnt_idx].idx = counter->counter_block_offset;
	perf_counters[cnt_idx].gpu_norm = false;
	perf_counters[cnt_idx].scaling_factor = 1;
	cnt_idx++;
	}

	for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
	const struct kbase_ipa_counter *counter =
	&model_data->shader_cores_cntrs_def[i];

	perf_counters[cnt_idx].type = counter->counter_block_type;
	perf_counters[cnt_idx].idx = counter->counter_block_offset;
	perf_counters[cnt_idx].gpu_norm = false;
	perf_counters[cnt_idx].scaling_factor = 1;
	cnt_idx++;
	}

	err = kbase_ipa_control_register(kbdev, perf_counters,
	model_data->num_counters,
	&model_data->ipa_control_client);
	if (err)
	dev_err(kbdev->dev,
	"Failed to register IPA with kbase_ipa_control");

	kfree(perf_counters);
	return err;
	}

	/**
	* kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control.
	* @model_data: Pointer to counter model data
	*/
	static void
	kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
	{
	if (model_data->ipa_control_client) {
	kbase_ipa_control_unregister(model_data->kbdev,
	model_data->ipa_control_client);
	model_data->ipa_control_client = NULL;
	}
	}

	static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data,
	const struct kbase_ipa_counter *const cnt_defs,
	size_t num_counters, s32 *counter_coeffs,
	u64 counter_values, u32 active_cycles, u32 coeffp)
	{
	u64 coeff = 0, coeff_mul = 0;
	s64 total_energy = 0;
	size_t i;

	/* Range for the 'counter_value' is [0, 2^38)
	* Range for the 'coeff' is [-2^21, 2^21]
	* So range for the 'group_energy' is [-2^59, 2^59) and range for the
	* 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e.
	* [-2^63, 2^63).
	*/
	for (i = 0; i < num_counters; i++) {
	s32 coeff = counter_coeffs[i];
	u64 counter_value = counter_values[i];
	s64 group_energy = kbase_ipa_group_energy(coeff, counter_value);

	if (counter_value > MAX_COUNTER_INCREMENT) {
	dev_warn(model_data->kbdev->dev,
	"Increment in counter %s more than expected",
	cnt_defs[i].name);
	return -ERANGE;
	}

	total_energy =
	kbase_ipa_add_saturate(total_energy, group_energy);
	}

	/* Range: 0 <= coeff < 2^63 */
	if (total_energy >= 0)
	coeff = total_energy;
	else
	dev_dbg(model_data->kbdev->dev,
	"Energy value came negative as %lld", total_energy);

	/* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this
	* can be constrained further: the value of counters that are being
	* used for dynamic power estimation can only increment by about 128
	* maximum per clock cycle. This is because max number of shader
	* cores is expected to be 32 (max number of L2 slices is expected to
	* be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT &
	* SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle.
	* Each "beat" is defined as 128 bits and each shader core can
	* (currently) do 512 bits read and 512 bits write to/from the L2
	* cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment
	* [0, 4] per shader core per cycle.
	* We can thus write the range of 'coeff' in terms of active_cycles:
	*
	* coeff = SUM(coeffN * counterN * num_cores_for_counterN)
	* coeff <= SUM(coeffN * counterN) * max_cores
	* coeff <= num_IPA_groups * max_coeff * max_counter * max_cores
	* (substitute max_counter = 2^2 * active_cycles)
	* coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores
	* coeff <= 2^4 * 2^21 * 2^2 * active_cycles * 2^5
	* coeff <= 2^32 * active_cycles
	*
	* So after the division: 0 <= coeff <= 2^32
	*/
	coeff = div_u64(coeff, active_cycles);

	/* Not all models were derived at the same reference voltage. Voltage
	* scaling is done by multiplying by V^2, so we need to divide by
	* Vref^2 here.
	* Range: 0 <= coeff <= 2^35
	*/
	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
	/* Range: 0 <= coeff <= 2^38 */
	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));

	/* Scale by user-specified integer factor.
	* Range: 0 <= coeff_mul < 2^43
	*/
	coeff_mul = coeff * model_data->scaling_factor;

	/* The power models have results with units
	* mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
	* becomes fW/(Hz V^2), which are the units of coeff_mul. However,
	* kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
	* by 1000.
	* Range: 0 <= coeff_mul < 2^33
	*/
	coeff_mul = div_u64(coeff_mul, 1000u);

	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
	*coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16);

	return 0;
	}

	int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model model, u32 coeffp)
	{
	struct kbase_ipa_counter_model_data *model_data =
	(struct kbase_ipa_counter_model_data *)model->model_data;
	struct kbase_device *kbdev = model->kbdev;
	s32 *counter_coeffs_p = model_data->counter_coeffs;
	u64 *cnt_values_p = model_data->counter_values;
	const u64 num_counters = model_data->num_counters;
	u32 active_cycles;
	ktime_t now, diff;
	s64 diff_ms;
	int ret;

	lockdep_assert_held(&kbdev->ipa.lock);

	/* The last argument is supposed to be a pointer to the location that
	* will store the time for which GPU has been in protected mode since
	* last query. This can be passed as NULL as counter model itself will
	* not be used when GPU enters protected mode, as IPA is supposed to
	* switch to the simple power model.
	*/
	ret = kbase_ipa_control_query(kbdev,
	model_data->ipa_control_client,
	cnt_values_p, num_counters, NULL);
	if (WARN_ON(ret))
	return ret;

	now = ktime_get_raw();
	diff = ktime_sub(now, kbdev->ipa.last_sample_time);
	diff_ms = ktime_to_ms(diff);

	kbdev->ipa.last_sample_time = now;

	/* The counter values cannot be relied upon if the sampling interval was
	* too long. Typically this will happen when the polling is started
	* after the temperature has risen above a certain trip point. After
	* that regular calls every 25-100 ms interval are expected.
	*/
	if (diff_ms > MAX_SAMPLE_INTERVAL_MS) {
	dev_dbg(kbdev->dev,
	"Last sample was taken %lld milli seconds ago",
	diff_ms);
	return -EOVERFLOW;
	}

	/* Range: 0 (GPU not used at all), to the max sampling interval, say
	* 1 seconds, * max GPU frequency (GPU 100% utilized).
	* 0 <= active_cycles <= 1 * ~2GHz
	* 0 <= active_cycles < 2^31
	*/
	if (*cnt_values_p > U32_MAX) {
	dev_warn(kbdev->dev,
	"Increment in GPU_ACTIVE counter more than expected");
	return -ERANGE;
	}

	active_cycles = (u32)*cnt_values_p;

	/* If the value of the active_cycles is less than the threshold, then
	* return an error so that IPA framework can approximate using the
	* cached simple model results instead. This may be more accurate
	* than extrapolating using a very small counter dump.
	*/
	if (active_cycles < (u32)max(model_data->min_sample_cycles, 0))
	return -ENODATA;

	/* Range: 1 <= active_cycles < 2^31 */
	active_cycles = max(1u, active_cycles);

	cnt_values_p++;
	ret = calculate_coeff(model_data, model_data->top_level_cntrs_def,
	model_data->num_top_level_cntrs,
	counter_coeffs_p, cnt_values_p, active_cycles,
	&coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
	if (ret)
	return ret;

	cnt_values_p += model_data->num_top_level_cntrs;
	counter_coeffs_p += model_data->num_top_level_cntrs;
	ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def,
	model_data->num_shader_cores_cntrs,
	counter_coeffs_p, cnt_values_p, active_cycles,
	&coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);

	return ret;
	}

	void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model)
	{
	struct kbase_ipa_counter_model_data *model_data =
	(struct kbase_ipa_counter_model_data *)model->model_data;
	u64 *cnt_values_p = model_data->counter_values;
	const u64 num_counters = model_data->num_counters;
	int ret;

	lockdep_assert_held(&model->kbdev->ipa.lock);

	ret = kbase_ipa_control_query(model->kbdev,
	model_data->ipa_control_client,
	cnt_values_p, num_counters, NULL);
	WARN_ON(ret);
	}

	int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model,
	const struct kbase_ipa_counter *top_level_cntrs_def,
	size_t num_top_level_cntrs,
	const struct kbase_ipa_counter *shader_cores_cntrs_def,
	size_t num_shader_cores_cntrs,
	s32 reference_voltage)
	{
	struct kbase_ipa_counter_model_data *model_data;
	s32 *counter_coeffs_p;
	int err = 0;
	size_t i;

	if (!model \|\| !top_level_cntrs_def \|\| !shader_cores_cntrs_def \|\|
	!num_top_level_cntrs \|\| !num_shader_cores_cntrs)
	return -EINVAL;

	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
	if (!model_data)
	return -ENOMEM;

	model_data->kbdev = model->kbdev;

	model_data->top_level_cntrs_def = top_level_cntrs_def;
	model_data->num_top_level_cntrs = num_top_level_cntrs;

	model_data->shader_cores_cntrs_def = shader_cores_cntrs_def;
	model_data->num_shader_cores_cntrs = num_shader_cores_cntrs;

	model->model_data = (void *)model_data;

	counter_coeffs_p = model_data->counter_coeffs;

	for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
	const struct kbase_ipa_counter *counter =
	&model_data->top_level_cntrs_def[i];

	*counter_coeffs_p = counter->coeff_default_value;

	err = kbase_ipa_model_add_param_s32(
	model, counter->name, counter_coeffs_p, 1, false);
	if (err)
	goto exit;

	counter_coeffs_p++;
	}

	for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
	const struct kbase_ipa_counter *counter =
	&model_data->shader_cores_cntrs_def[i];

	*counter_coeffs_p = counter->coeff_default_value;

	err = kbase_ipa_model_add_param_s32(
	model, counter->name, counter_coeffs_p, 1, false);
	if (err)
	goto exit;

	counter_coeffs_p++;
	}

	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
	err = kbase_ipa_model_add_param_s32(
	model, "scale", &model_data->scaling_factor, 1, false);
	if (err)
	goto exit;

	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
	&model_data->min_sample_cycles, 1,
	false);
	if (err)
	goto exit;

	model_data->reference_voltage = reference_voltage;
	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
	&model_data->reference_voltage, 1,
	false);
	if (err)
	goto exit;

	err = kbase_ipa_attach_ipa_control(model_data);

	exit:
	if (err) {
	kbase_ipa_model_param_free_all(model);
	kfree(model_data);
	}
	return err;
	}

	void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model)
	{
	struct kbase_ipa_counter_model_data *model_data =
	(struct kbase_ipa_counter_model_data *)model->model_data;

	kbase_ipa_detach_ipa_control(model_data);
	kfree(model_data);
	}